In [1]:
# Lemma. max_cluster for GA SS is either 31 or 32. 

# Theorem. max_cluster for GA SS is 32. 
# Proof idea. We show that 32 clusters is not achievable by breaking the 
#   problem into cases, each of which has cluster_UB <= 31.

state = 'GA'
district_type = 'SS'

filepath = 'C:\\districting-data-2020\\'
filename = state + '_county.json'

import networkx as nx
from gerrychain import Graph

GC = Graph.from_json( filepath + filename )

In [2]:
# # TESTS THE FUNCTION: nx.contracted_nodes()
# import matplotlib.pyplot as plt

# #####################
# print("cycle graph:")
# cycle = nx.cycle_graph(5)
# names = ['zero', 'one', 'two', 'three', 'four']
# for i in cycle.nodes:
#     cycle.nodes[i]['TOTPOP'] = 1
#     cycle.nodes[i]['NAME20'] = names[i]
#     print(i,cycle.nodes[i]['TOTPOP'],cycle.nodes[i]['NAME20'])
    
# plt.figure(1)
# nx.draw(cycle,with_labels=True)


# #####################
# print("contracted cycle graph:")
# contracted_cycle = nx.contracted_nodes(cycle,1,2,self_loops=False)
# contracted_cycle.nodes[1]['TOTPOP'] += cycle.nodes[2]['TOTPOP']

# for i in contracted_cycle.nodes():
#     print(i,contracted_cycle.nodes[i]['TOTPOP'],contracted_cycle.nodes[i]['NAME20'])
    
# plt.figure(2)
# nx.draw(contracted_cycle,with_labels=True)

# #####################
# print("cycle graph (again):")
# for i in cycle.nodes:
#     print(i,cycle.nodes[i]['TOTPOP'],cycle.nodes[i]['NAME20'])
# plt.figure(3)
# nx.draw(cycle,with_labels=True)

In [3]:
# Facts:
#  1. Dade County is a leaf vertex in the county graph and its population (16251) is 
#     less than L (so it can't be in a cluster by itself), so we might as well 
#     merge it with its neighbor Walker County. 

from util import update_attributes, get_k_L_U
update_attributes(GC, state)   
(k,L,U) = get_k_L_U(GC, state, district_type)

for i in GC.nodes:
    name = GC.nodes[i]['NAME20']
    if name == 'Dade':
        d = i

neighbors = list(GC.neighbors(d))
print("Dade County has population:",GC.nodes[d]['TOTPOP'],"and its neighbors are:",neighbors)

Starting GA with k = 56 and deviation = 0.1
Thus, we have L = 181720 and U = 200848
Dade County has population: 16251 and its neighbors are: [62]


In [4]:
# merge Dade County into Walker County:
w = neighbors[0]
GC.nodes[w]['TOTPOP'] += GC.nodes[d]['TOTPOP']
GC = nx.contracted_nodes(GC,w,d,self_loops=False)

In [5]:
import math
def cluster_UB(G, U, k):
    return k - sum( math.floor( G.nodes[i]['TOTPOP'] / (U+1) ) for i in G.nodes )

In [6]:
# 2. Douglas County has population 144237, which is less than L, and so 
#    must belong to the same cluster as one of its four neighbors. 

for i in GC.nodes:
    name = GC.nodes[i]['NAME20']
    if name == 'Douglas':
        d = i
        
names = [ GC.nodes[i]['NAME20'] for i in GC.neighbors(d) ]
nodes = [ i for i in GC.neighbors(d) ]
print("Douglas County has population:",GC.nodes[d]['TOTPOP'],"and its neighbors are:",names,"or",nodes)

Douglas County has population: 144237 and its neighbors are: ['Fulton', 'Cobb', 'Carroll', 'Paulding'] or [7, 9, 19, 22]


In [7]:
# This creates 4 cases, depending on which neighbor is merged into Douglas:
cases = list()

for i in GC.neighbors(d):
    ipop = GC.nodes[i]['TOTPOP']
    case = nx.contracted_nodes(GC, d, i, self_loops=False)
    case.nodes[d]['TOTPOP'] += ipop
    cases.append( case )

In [8]:
# So far, each case has cluster_UB = 32.
for case in cases:
    print(cluster_UB(case,U, k))

32
32
32
32


In [9]:
# 3. Now, Catoosa County has population 67872, which is less than L, and 
#    so must belong to the same cluster as one of its two neighbors:
#       (wh) Whitfield (pop 102864), and
#       (w)  Walker (pop 67654, plus Dade's 16251). 

for i in GC.nodes:
    if GC.nodes[i]['NAME20'] == 'Catoosa':
        c = i
    elif GC.nodes[i]['NAME20'] == 'Whitfield':
        wh = i

for i in GC.neighbors(c):
    print(i,GC.nodes[i]['NAME20'],GC.nodes[i]['TOTPOP'],"+",GC.nodes[c]['TOTPOP'],"=",GC.nodes[i]['TOTPOP']+GC.nodes[c]['TOTPOP'])

53 Whitfield 102864 + 67872 = 170736
62 Walker 83905 + 67872 = 151777


In [10]:
# Thus, we can break the 4 cases into 8 (sub)cases:
new_cases = list()

for case in cases:
    for i in case.neighbors(c):
        ipop = case.nodes[i]['TOTPOP']
        new_case = nx.contracted_nodes( case, c, i, self_loops=False)
        new_case.nodes[c]['TOTPOP'] += ipop
        new_cases.append( new_case )
        
cases = new_cases

In [11]:
# So far, each subcase has cluster_UB = 32.
for case in cases:
    print(cluster_UB(case,U, k))

32
32
32
32
32
32
32
32


In [12]:
# 4. Next, in each (sub)case, the population of the (merged) c is still less than L.
#    It is either 170736 (if Whitfield merged in) or 151777 (if Walker merged in).
for case in cases:
    print(case.nodes[c]['TOTPOP'])

170736
151777
170736
151777
170736
151777
170736
151777


In [13]:
# So, we can can divide the (sub)cases into new (subsub)cases, depending on which neighbor is merged into c
new_cases = list()

for case in cases:
    for i in case.neighbors(c):
        ipop = case.nodes[i]['TOTPOP']
        new_case = nx.contracted_nodes( case, c, i, self_loops=False)
        new_case.nodes[c]['TOTPOP'] += ipop
        new_cases.append( new_case )
        
cases = new_cases

In [14]:
# Most of the cases now have cluster_UB = 31, so we can remove them.
for case in cases:
    print( cluster_UB(case,U, k) )
    
cases = [ case for case in cases if cluster_UB(case,U, k) >= 32 ]
print("Number of remaining (subsub)cases =",len(cases))

31
31
31
31
31
31
32
31
31
31
31
31
31
32
31
31
31
31
31
31
32
31
31
31
31
31
31
32
Number of remaining (subsub)cases = 4


In [15]:
# 5. Again, the population of the (merged) c is still less than L. 
#    So, we can create new (subsubsub)cases depending on which neighbor is merged into c.
for case in cases:
    print(case.nodes[c]['TOTPOP'])

176742
176742
176742
176742


In [16]:
new_cases = list()

for case in cases:
    for i in case.neighbors(c):
        ipop = case.nodes[i]['TOTPOP']
        new_case = nx.contracted_nodes( case, c, i, self_loops=False)
        new_case.nodes[c]['TOTPOP'] += ipop
        new_cases.append( new_case )
        
cases = new_cases

In [17]:
# Their cluster_UB's are all 31. So, it is impossible to get 32 clusters!
for case in cases:
    print(cluster_UB(case,U, k))

31
31
31
31
31
31
31
31
31
31
31
31
