# Connecting existing clusters of a bicycle network
## Project: Bicycle network analysis with Gourab, Sayat, Tyler, Michael, Roberta

This notebook takes a city's existing bicycle network, as prepared in 01_prepare_networks, then connects the biggest clusters following greedy triangulation. Code adapted from Tyler.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2020-06-29  
Last modified: 2020-07-16

## Preliminaries

### Parameters

In [14]:
debug = False # If True, will produce plots and/or verbose output to double-check

prune_measure = "betweenness"

# cutofftype "abs" or "rel"
# Case rel: cutoff (0-1) is fraction threshold of total length
# Case abs: cutoff (in meters) is minimal length of cluster to be considered
cutofftype = "rel" 
cutoff = 0.5

### Setup

In [15]:
%run -i path.py
%run -i setup.py

%matplotlib inline
%load_ext watermark
%watermark -n -v -m -g -iv

Loaded PATH


=== Cities ===
{'budapest': {'countryid': 'hun', 'nominatimstring': 'Budapest, Hungary'}}


Setup finished

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
pandas    1.0.3
geopandas 0.7.0
fiona     1.8.13
osmnx     0.13.0
shapely   1.7.0
networkx  2.4
pyproj    2.6.1.post1
watermark 2.0.2
csv       1.0
igraph    0.8.2
numpy     1.18.4
Wed Jul 15 2020 

CPython 3.8.2
IPython 7.14.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.5.0
machine    : x86_64
processor  : i386
CPU cores  : 12
interpreter: 64bit
Git hash   :


### Functions

In [18]:
%run -i functions.py

Loaded functions


## Cluster Analysis

In [17]:
for placeid, placeinfo in cities.items():
    print(placeid + ": Generating cluster connections")
    placepath = PATH["data"] + placeid + "/"

    # Load networks
    G_biketrack = csv_to_ig(placepath, placeid, 'biketrack')
    G_carall = csv_to_ig(placepath, placeid, 'carall')
    G_biketrackcarall = csv_to_ig(placepath, placeid, 'biketrackcarall')
    G = copy.deepcopy(G_biketrack) # G is the bike graph we are working with
    
    # Prepare clusterinfo
    clusters = []
    clusterinfo = {}
    i = 0
    total_length = sum(G.es["weight"])
    for j in range(len(list(G.components()))):
        if len(list(G.components())[j]) > 1:
            clusterinfo[i] = {"size": G.subgraph(list(G.components())[j]).vcount(), 
                              "centroid_id": highest_closeness_node(G.subgraph(list(G.components())[j]))[0],
                              "length": sum(G.subgraph(list(G.components())[j]).es["weight"])
                              }
            clusterinfo[i]["centroid_index"] = G.vs.find(id = clusterinfo[i]['centroid_id']).index
            i += 1

    cluster_indices = clusterindices_by_length(clusterinfo)

    clusterinfo_temp = {}
    length_covered = 0
    
    numclusters = 0
    for c in cluster_indices:
        if cutofftype == "abs" and clusterinfo[c]["length"] < cutoff:
            break
        clusters.append(G.subgraph(list(G.components())[c]))
        clusterinfo_temp[numclusters] = clusterinfo[c]
        length_covered += clusterinfo[c]["length"]
        numclusters += 1
        if cutofftype == "rel" and length_covered >= cutoff*total_length:
            break

    print('{:d}'.format(numclusters) + " largest clusters of " + '{:d}'.format(len(list(G.components()))) + " considered. Length covered: " + '{:.2f}'.format(length_covered) + " km (" + '{:.0f}'.format(100*length_covered/total_length) + "% of total length)")

    if numclusters > 1: # Only run this if we have at least 2 clusters to connect
        clusterinfo = copy.deepcopy(clusterinfo_temp)
        cluster_indices = clusterindices_by_length(clusterinfo)
    
        # Generation
        GTs, GT_abstracts = greedy_triangulation_routing_clusters(G, G_biketrackcarall, clusters, clusterinfo, prune_quantiles, prune_measure)

        for GT in GTs:
            delete_overlaps(GT, G_biketrack)
    
        # Write results
        results = {"placeid": placeid, "prune_measure": prune_measure, "prune_quantiles": prune_quantiles, "GTs": GTs, "GT_abstracts": GT_abstracts, "clusters": clusters, "clusterinfo": clusterinfo, "cutoff": cutoff, "cutofftype": cutofftype}

        filename = placeid + '_clusters_' + prune_measure + "_cutoff" + cutofftype + "{:.2f}".format(cutoff) + ".pickle"
        resultfile = open(PATH["results"] + filename, 'wb')
        pickle.dump(results, resultfile)
        resultfile.close()
    else:
        print("Stopped execution due to insufficient number of clusters.")

budapest: Generating cluster connections


  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


12 largest clusters of 192 considered. Length covered: 136.42 km (51% of total length)
Working on cluster 1 of 12...


  sp = G_total.get_shortest_paths(a, c2_indices, weights = "weight", output = "epath")


Working on cluster 2 of 12...
Working on cluster 3 of 12...
Working on cluster 4 of 12...
Working on cluster 5 of 12...
Working on cluster 6 of 12...
Working on cluster 7 of 12...
Working on cluster 8 of 12...
Working on cluster 9 of 12...
Working on cluster 10 of 12...
Working on cluster 11 of 12...


In [None]:
if debug:
    pp.pprint(clusterinfo)

    plt.figure(figsize=[2*6.4, 2*4.8])
    for i in range(len(clusters)):
        plt.plot(clusters[i].vs['x'],clusters[i].vs['y'],'.')
    plt.gca().invert_yaxis()
    %config InlineBackend.figure_format = 'retina'

In [None]:
# # For testing
# # Get the pairs of distances between all clusters, where distance is routing distance of the closest nodes
# clusterpairs = clusterpairs_by_distance(G, G_biketrackcarall, clusters, clusterinfo, True, False, False)
# clusterpairs

In [None]:
# # For testing
# temp_list = set()
# for x in clusterpairs:
#     temp_list.add(x[1][0])
#     temp_list.add(x[1][1])
# temp_list = list(temp_list)
# temp_list
# my_plot_reset(G_biketrack, temp_list)
# ig.plot(G_biketrack)

In [None]:
if debug:
    for GT_abstract, prune_quantile in zip(GT_abstracts, prune_quantiles):
        ig.plot(GT_abstract, PATH["plots"] + placeid + '_GTclustersabstract_biketrack_cutoff' + cutofftype + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.2f}".format(prune_quantile) + '.png', bbox=(800,800))

### Plot the whole bike network and its new connections

In [None]:
if debug:
    for GT, prune_quantile in zip(GTs, prune_quantiles):
        plt.figure(figsize=[2*6.4, 2*4.8])
        plt.plot(GT.vs['x'], GT.vs['y'], 'o', color='red', markersize=3)
        plt.plot(G_biketrack.vs['x'], G_biketrack.vs['y'], 'o', color='black', markersize=1)
        plt.gca().invert_yaxis()
        %config InlineBackend.figure_format = 'retina'
        plt.savefig(PATH["plots"] + placeid + '_GTclusters_biketrack_cutoff' + cutofftype + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.2f}".format(prune_quantile) + '.png')