# Step 4 - Connecting existing clusters of a bicycle network
## Project: Algorithmic bicycle network design
#### Michael Szell, Tyler Perlman, Sayat Mimar, Gourab Ghoshal, Roberta Sinatra

This notebook takes a city's existing bicycle network, as prepared in 01_prepare_networks, then connects the biggest clusters following greedy triangulation.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2020-06-29  
Last modified: 2020-07-17

## Preliminaries

### Parameters

In [1]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

Loaded parameters.



### Setup

In [2]:
%run -i path.py
%run -i setup.py

%matplotlib inline
%load_ext watermark
%watermark -n -v -m -g -iv

Loaded PATH
Setup finished.

osmnx     0.13.0
igraph    0.8.2
pandas    1.0.3
networkx  2.4
geopandas 0.7.0
fiona     1.8.13
csv       1.0
pyproj    2.6.1.post1
watermark 2.0.2
numpy     1.18.4
shapely   1.7.0
Fri Jul 17 2020 

CPython 3.8.2
IPython 7.14.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.5.0
machine    : x86_64
processor  : i386
CPU cores  : 12
interpreter: 64bit
Git hash   :


### Functions

In [3]:
%run -i functions.py

Loaded functions


## Cluster Analysis

In [4]:
for placeid, placeinfo in cities.items():
    print(placeid + ": Generating cluster connections")

    # Load networks
    G_biketrack = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrack')
    G_carall = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'carall')
    G_biketrackcarall = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrackcarall')
    G = copy.deepcopy(G_biketrack) # G is the bike graph we are working with
    
    # Prepare clusterinfo
    clusters = []
    clusterinfo = {}
    i = 0
    total_length = sum(G.es["weight"])
    for j in range(len(list(G.components()))):
        if len(list(G.components())[j]) > 1:
            clusterinfo[i] = {"size": G.subgraph(list(G.components())[j]).vcount(), 
                              "centroid_id": highest_closeness_node(G.subgraph(list(G.components())[j]))[0],
                              "length": sum(G.subgraph(list(G.components())[j]).es["weight"])
                              }
            clusterinfo[i]["centroid_index"] = G.vs.find(id = clusterinfo[i]['centroid_id']).index
            i += 1

    cluster_indices = clusterindices_by_length(clusterinfo)

    clusterinfo_temp = {}
    length_covered = 0
    
    numclusters = 0
    for c in cluster_indices:
        if cutofftype == "abs" and clusterinfo[c]["length"] < cutoff/1000:
            break
        clusters.append(G.subgraph(list(G.components())[c]))
        clusterinfo_temp[numclusters] = clusterinfo[c]
        length_covered += clusterinfo[c]["length"]
        numclusters += 1
        if cutofftype == "rel" and length_covered >= cutoff*total_length:
            break

    print('{:d}'.format(numclusters) + " largest clusters of " + '{:d}'.format(len(list(G.components()))) + " considered. Length covered: " + '{:.2f}'.format(length_covered) + " km (" + '{:.0f}'.format(100*length_covered/total_length) + "% of total length)")

    if numclusters > 1: # Only run this if we have at least 2 clusters to connect
        clusterinfo = copy.deepcopy(clusterinfo_temp)
        cluster_indices = clusterindices_by_length(clusterinfo)
    
        # Generation
        GTs, GT_abstracts = greedy_triangulation_routing_clusters(G, G_biketrackcarall, clusters, clusterinfo, prune_quantiles, prune_measure)

        for GT in GTs:
            delete_overlaps(GT, G_biketrack)
    
        # Write results
        results = {"placeid": placeid, "prune_measure": prune_measure, "prune_quantiles": prune_quantiles, "GTs": GTs, "GT_abstracts": GT_abstracts, "clusters": clusters, "clusterinfo": clusterinfo, "cutoff": cutoff, "cutofftype": cutofftype}

        filename = placeid + '_clusters_' + prune_measure + "_cutoff" + cutofftype + "{:.2f}".format(cutoff) + ".pickle"
        resultfile = open(PATH["results"] + placeid + "/" + filename, 'wb')
        pickle.dump(results, resultfile)
        resultfile.close()
    else:
        print("Stopped execution due to insufficient number of clusters.")

vienna: Generating cluster connections


  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


30 largest clusters of 476 considered. Length covered: 581.09 km (81% of total length)
Working on cluster 1 of 30...


  sp = G_total.get_shortest_paths(a, c2_indices, weights = "weight", output = "epath")


Working on cluster 2 of 30...
Working on cluster 3 of 30...
Working on cluster 4 of 30...
Working on cluster 5 of 30...
Working on cluster 6 of 30...
Working on cluster 7 of 30...
Working on cluster 8 of 30...
Working on cluster 9 of 30...
Working on cluster 10 of 30...
Working on cluster 11 of 30...
Working on cluster 12 of 30...
Working on cluster 13 of 30...
Working on cluster 14 of 30...
Working on cluster 15 of 30...
Working on cluster 16 of 30...
Working on cluster 17 of 30...


  sp = G_total.get_shortest_paths(b, c1_indices, weights = "weight", output = "epath")


Working on cluster 18 of 30...
Working on cluster 19 of 30...
Working on cluster 20 of 30...
Working on cluster 21 of 30...
Working on cluster 22 of 30...
Working on cluster 23 of 30...
Working on cluster 24 of 30...
Working on cluster 25 of 30...
Working on cluster 26 of 30...
Working on cluster 27 of 30...
Working on cluster 28 of 30...
Working on cluster 29 of 30...


In [5]:
if debug:
    pp.pprint(clusterinfo)

    plt.figure(figsize=[2*6.4, 2*4.8])
    for i in range(len(clusters)):
        plt.plot(clusters[i].vs['x'],clusters[i].vs['y'],'.')
    plt.gca().invert_yaxis()
    %config InlineBackend.figure_format = 'retina'

In [6]:
# # For testing
# # Get the pairs of distances between all clusters, where distance is routing distance of the closest nodes
# clusterpairs = clusterpairs_by_distance(G, G_biketrackcarall, clusters, clusterinfo, True, False, False)
# clusterpairs

In [7]:
# # For testing
# temp_list = set()
# for x in clusterpairs:
#     temp_list.add(x[1][0])
#     temp_list.add(x[1][1])
# temp_list = list(temp_list)
# temp_list
# my_plot_reset(G_biketrack, temp_list)
# ig.plot(G_biketrack)

In [8]:
if debug:
    for GT_abstract, prune_quantile in zip(GT_abstracts, prune_quantiles):
        ig.plot(GT_abstract, PATH["plots"] + placeid + '_GTclustersabstract_biketrack_cutoff' + cutofftype + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.3f}".format(prune_quantile) + '.png', bbox=(800,800))

### Plot the whole bike network and its new connections

In [9]:
if debug:
    for GT, prune_quantile in zip(GTs, prune_quantiles):
        plt.figure(figsize=[2*6.4, 2*4.8])
        plt.plot(GT.vs['x'], GT.vs['y'], 'o', color='red', markersize=3)
        plt.plot(G_biketrack.vs['x'], G_biketrack.vs['y'], 'o', color='black', markersize=1)
        plt.gca().invert_yaxis()
        %config InlineBackend.figure_format = 'retina'
        plt.savefig(PATH["plots"] + placeid + '_GTclusters_biketrack_cutoff' + cutofftype + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.3f}".format(prune_quantile) + '.png')