In [1]:
filepath = 'C:\\districting-data-2020\\'

# 1. Code takes Daryl's tract/block-level .json files and 
#    adds the lat/lon coordinates to the nodes
#    (based on the census shapefiles)
#
outfilepath1 = 'C:\\districting-data-2020-lat-lon\\'

# 2. Then connects any disconnected graphs by adding
#    a minimum length subset of edges.
#
outfilepath2 = 'C:\\districting-data-2020-conn\\'

level = 'block'   # block or tract

In [2]:
import geopandas as gpd
from gerrychain import Graph
import networkx as nx

In [3]:
state_codes = {
    'WA': '53', 'DE': '10', 'WI': '55', 'WV': '54', 'HI': '15',
    'FL': '12', 'WY': '56', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36',
    'PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13',
    'IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29',
    'MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'
}

In [4]:
import json
from networkx.readwrite import json_graph

def write_graph_to_json(graph, json_file):
    data = json_graph.adjacency_data(graph)
    with open(json_file, "w") as f:
        json.dump(data,f)
    return

In [5]:
from math import cos, asin, sqrt, pi

# get approximate distance between lat/long coordinates:
# https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula?noredirect=1&lq=1
def apx_dist(lat1, lon1, lat2, lon2):
    p = pi/180
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((lon2-lon1)*p))/2
    return 12.742 * asin(sqrt(a)) #2*R*asin...

In [6]:
for state in state_codes.keys():
    
    print("Beginning state",state,"\n")
    
    # read county graph from file; is it connected?
    countyfile = state + '_county.json'
    GC = Graph.from_json( filepath + countyfile )
    
    # read graph from file
    graphfile = state + '_' + level + '.json'
    G = Graph.from_json( filepath + graphfile )

    # read the shapefile
    shpfile = state + '_' + level + '.shp'
    df = gpd.read_file( filepath + shpfile )
    
    # add the lat/lon coordinates to graph (based on shapefile)
    geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }
    for u in range(len(G.nodes)):
        geoid = df['GEOID20'][u]
        i = geoid_to_node[geoid]
        G.nodes[i]['INTPTLAT20'] = float( df['INTPTLAT20'][u] )
        G.nodes[i]['INTPTLON20'] = float( df['INTPTLON20'][u] )
        
    # write the lat/lon-adjusted graph to output1
    write_graph_to_json(G, outfilepath1 + graphfile)
    
    # add intra-county edges?
    nodes_in_county = { GC.nodes[c]['GEOID20'] : list() for c in GC.nodes }
    for i in G.nodes:
        geoid = G.nodes[i]['GEOID20'][0:5]
        nodes_in_county[geoid].append(i)
    
    for county in GC.nodes:
        geoid = GC.nodes[county]['GEOID20'] 
        nodes = nodes_in_county[geoid]
        G_county = G.subgraph(nodes)
        if not nx.is_connected(G_county):
            print("The nodes within",GC.nodes[county]['NAME20'],"County with geoid =",geoid,"are not connected!")
            print("Adding intra-county edges...")
            
            # if not, then connect it with least-cost edges
            components = list(nx.connected_components(G_county))
            H = nx.complete_graph(len(components))

            for c in range(len(components)):
                component = components[c]
                print("Component",c,"has this many nodes:",len(component))

            for i,j in H.edges:

                min_dist = 999999999999.0 # some large number
                min_dist_edge = None

                for u in components[i]:

                    lat1 = G.nodes[u]['INTPTLAT20']
                    lon1 = G.nodes[u]['INTPTLON20']

                    for v in components[j]:

                        lat2 = G.nodes[v]['INTPTLAT20']
                        lon2 = G.nodes[v]['INTPTLON20']

                        dist = apx_dist(lat1, lon1, lat2, lon2)

                        if dist < min_dist:
                            min_dist = dist
                            min_dist_edge = (u,v)

                H.edges[i,j]['weight'] = min_dist
                H.edges[i,j]['G_edge'] = min_dist_edge

            # find min span tree of H
            T = nx.minimum_spanning_tree(H,weight='weight')
            print("Spanning tree edges in H are =",T.edges)

            # which edges does this correspond to in G?
            edges_to_add = [ T.edges[i,j]['G_edge'] for i,j in T.edges ]
            print("Adding these edges to G =",edges_to_add)
            G.add_edges_from(edges_to_add)

            # print their GEOIDs for later reference
            added_edges_geoids = [ ( G.nodes[i]['GEOID20'], G.nodes[j]['GEOID20'] ) for i,j in edges_to_add ]
            print("Their GEOIDs are =",added_edges_geoids)

            # make sure G is now connected
            print("After adding these edges, is G_county now connected?",nx.is_connected(G_county))
            print("")
    
    # add inter-county edges?
    if nx.is_connected(G):
        
        print("Graph is connected; no need for more inter-county edges.")
        
    else:
        print("Adding inter-county edges...")
        # if not, then connect it with least-cost edges
        components = list(nx.connected_components(G))
        H = nx.complete_graph(len(components))

        for c in range(len(components)):
            component = components[c]
            print("Component",c,"has this many nodes:",len(component))

        for i,j in H.edges:

            min_dist = 999999999999.0 # some large number
            min_dist_edge = None

            for u in components[i]:

                lat1 = G.nodes[u]['INTPTLAT20']
                lon1 = G.nodes[u]['INTPTLON20']

                for v in components[j]:

                    lat2 = G.nodes[v]['INTPTLAT20']
                    lon2 = G.nodes[v]['INTPTLON20']
                    
                    dist = apx_dist(lat1, lon1, lat2, lon2)

                    if dist < min_dist:
                        min_dist = dist
                        min_dist_edge = (u,v)

            H.edges[i,j]['weight'] = min_dist
            H.edges[i,j]['G_edge'] = min_dist_edge

        # find min span tree of H
        T = nx.minimum_spanning_tree(H,weight='weight')
        print("Spanning tree edges in H are =",T.edges)

        # which edges does this correspond to in G?
        edges_to_add = [ T.edges[i,j]['G_edge'] for i,j in T.edges ]
        print("Adding these edges to G =",edges_to_add)
        G.add_edges_from(edges_to_add)

        # print their GEOIDs for later reference
        added_edges_geoids = [ ( G.nodes[i]['GEOID20'], G.nodes[j]['GEOID20'] ) for i,j in edges_to_add ]
        print("Their GEOIDs are =",added_edges_geoids)

        # make sure G is now connected
        print("After adding these edges, is G now connected?",nx.is_connected(G))
        
    print("")
    # write the lat/lon-adjusted *and connected* graph to output2
    write_graph_to_json(G, outfilepath2 + graphfile)

Beginning state WA 

Graph is connected; no need for more inter-county edges.

Beginning state DE 

Graph is connected; no need for more inter-county edges.

Beginning state WI 

Graph is connected; no need for more inter-county edges.

Beginning state WV 

Graph is connected; no need for more inter-county edges.

Beginning state HI 



  "Found islands (degree-0 nodes). Indices of islands: {}".format(islands)


The nodes within Kauai County with geoid = 15007 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 1331
Component 1 has this many nodes: 4
Component 2 has this many nodes: 9
Spanning tree edges in H are = [(0, 2), (1, 2)]
Adding these edges to G = [(6350, 2671), (2866, 2865)]
Their GEOIDs are = [('150079901000014', '150070412001005'), ('150079903000001', '150070412001001')]
After adding these edges, is G_county now connected? True

The nodes within Maui County with geoid = 15009 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 1597
Component 1 has this many nodes: 318
Component 2 has this many nodes: 132
Spanning tree edges in H are = [(0, 1), (1, 2)]
Adding these edges to G = [(2274, 8276), (3194, 12793)]
Their GEOIDs are = [('150099902000009', '150090317001014'), ('150099900000007', '150099912000001')]
After adding these edges, is G_county now connected? True

The nodes within Honolulu County with geoid = 15003 are not co

  "Found islands (degree-0 nodes). Indices of islands: {}".format(islands)


The nodes within New York County with geoid = 36061 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 4041
Component 1 has this many nodes: 1
Component 2 has this many nodes: 1
Spanning tree edges in H are = [(0, 1), (1, 2)]
Adding these edges to G = [(132757, 123738), (123738, 63261)]
Their GEOIDs are = [('360610319000001', '360610001001000'), ('360610001001000', '360610001001001')]
After adding these edges, is G_county now connected? True

Graph is connected; no need for more inter-county edges.

Beginning state PA 

The nodes within Chester County with geoid = 42029 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 9726
Component 1 has this many nodes: 2
Spanning tree edges in H are = [(0, 1)]
Adding these edges to G = [(72374, 89824)]
Their GEOIDs are = [('420293111002044', '420293031002021')]
After adding these edges, is G_county now connected? True

Graph is connected; no need for more inter-county edges.

Beginning st

Spanning tree edges in H are = [(0, 1)]
Adding these edges to G = [(3669, 24498)]
Their GEOIDs are = [('021989401003001', '021980003001002')]
After adding these edges, is G_county now connected? True

Graph is connected; no need for more inter-county edges.

Beginning state NV 

Graph is connected; no need for more inter-county edges.

Beginning state NH 

Graph is connected; no need for more inter-county edges.

Beginning state VA 

The nodes within Montgomery County with geoid = 51121 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 1831
Component 1 has this many nodes: 1
Spanning tree edges in H are = [(0, 1)]
Adding these edges to G = [(14186, 142056)]
Their GEOIDs are = [('511210215013007', '511210215013009')]
After adding these edges, is G_county now connected? True

The nodes within Bristol County with geoid = 51520 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 610
Component 1 has this many nodes: 1
Spanning tree

Graph is connected; no need for more inter-county edges.

Beginning state AR 

Graph is connected; no need for more inter-county edges.

Beginning state VT 

The nodes within Caledonia County with geoid = 50005 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 1891
Component 1 has this many nodes: 1
Spanning tree edges in H are = [(0, 1)]
Adding these edges to G = [(14996, 12512)]
Their GEOIDs are = [('500059578003074', '500059578003103')]
After adding these edges, is G_county now connected? True

Graph is connected; no need for more inter-county edges.

Beginning state IL 

Graph is connected; no need for more inter-county edges.

Beginning state GA 

The nodes within Lee County with geoid = 13177 are not connected!
Adding intra-county edges...
Component 0 has this many nodes: 598
Component 1 has this many nodes: 3
Spanning tree edges in H are = [(0, 1)]
Adding these edges to G = [(41946, 227629)]
Their GEOIDs are = [('131770203021053', '131770203021061'