# Step 4 - Connecting existing clusters of a bicycle network
## Project: Algorithmic bicycle network design
#### Michael Szell, Tyler Perlman, Sayat Mimar, Gourab Ghoshal, Roberta Sinatra

This notebook takes a city's existing bicycle network, as prepared in 01_prepare_networks, then connects the biggest clusters following greedy triangulation.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2020-06-29  
Last modified: 2020-07-21

## Preliminaries

### Parameters

In [None]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

### Setup

In [None]:
%run -i path.py
%run -i setup.py

%matplotlib inline
%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [None]:
%run -i functions.py

## Cluster Analysis

In [None]:
for placeid, placeinfo in cities.items():
    print(placeid + ": Generating cluster connections")

    # Load networks
    G_biketrack = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrack')
    G_carall = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'carall')
    G_biketrackcarall = csv_to_ig(PATH["data"] + placeid + "/", placeid, 'biketrackcarall')
    G = copy.deepcopy(G_biketrack) # G is the bike graph we are working with
    
    # Prepare clusterinfo
    clusters = []
    clusterinfo = {}
    i = 0
    total_length = sum(G.es["weight"])
    for j in range(len(list(G.components()))):
        if len(list(G.components())[j]) > 1:
            clusterinfo[i] = {"size": G.subgraph(list(G.components())[j]).vcount(), 
                              "centroid_id": highest_closeness_node(G.subgraph(list(G.components())[j]))[0],
                              "length": sum(G.subgraph(list(G.components())[j]).es["weight"])
                              }
            clusterinfo[i]["centroid_index"] = G.vs.find(id = clusterinfo[i]['centroid_id']).index
            i += 1

    cluster_indices = clusterindices_by_length(clusterinfo)

    clusterinfo_temp = {}
    length_covered = 0
    
    numclusters = 0
    for c in cluster_indices:
        if cutofftype == "abs" and clusterinfo[c]["length"] < cutoff/1000:
            break
        clusters.append(G.subgraph(list(G.components())[c]))
        clusterinfo_temp[numclusters] = clusterinfo[c]
        length_covered += clusterinfo[c]["length"]
        numclusters += 1
        if cutofftype == "rel" and length_covered >= cutoff*total_length:
            break

    print('{:d}'.format(numclusters) + " largest clusters of " + '{:d}'.format(len(list(G.components()))) + " considered. Length covered: " + '{:.2f}'.format(length_covered) + " km (" + '{:.0f}'.format(100*length_covered/total_length) + "% of total length)")

    clusterinfo = copy.deepcopy(clusterinfo_temp)
    cluster_indices = clusterindices_by_length(clusterinfo)

    # Generation
    GTs, GT_abstracts = greedy_triangulation_routing_clusters(G, G_biketrackcarall, clusters, clusterinfo, prune_quantiles, prune_measure)

    for GT in GTs:
        delete_overlaps(GT, G_biketrack)

    # Write results
    results = {"placeid": placeid, "prune_measure": prune_measure, "prune_quantiles": prune_quantiles, "GTs": GTs, "GT_abstracts": GT_abstracts, "clusters": clusters, "clusterinfo": clusterinfo, "cutoff": cutoff, "cutofftype": cutofftype}

    filename = placeid + '_clusters_' + prune_measure + "_cutoff" + cutofftype + "{:.2f}".format(cutoff) + ".pickle"
    with open(PATH["results"] + placeid + "/" + filename, 'wb') as f:
        pickle.dump(results, f)

In [None]:
Audio(sound_file, autoplay=True)