# Connecting existing clusters of a bicycle network
## Project: Bicycle network analysis with Gourab, Sayat, Tyler, Michael, Roberta

This notebook takes a city's existing bicycle network, as prepared in 01_prepare_networks, then connects the biggest clusters following greedy triangulation. Code adapted from Tyler.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2020-06-29  
Last modified: 2020-07-14

## Preliminaries

### Parameters

In [None]:
debug = True # If True, will produce plots and/or verbose output to double-check

prune_measure = "betweenness"
cutoff = 0.5 # How many clusters should be considered (covering cutoff fraction of total length)

### Setup

In [None]:
%run -i path.py
%run -i setup.py

%matplotlib inline
%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [None]:
%run -i functions.py

## Exploratory Data Analysis

In [None]:
# Old way of constructing G_biketrackcarall

# # Merging biketrack and carall for routing between biketrack clusters
# node_biketrackcarall = pd.concat([node_biketrack, node_carall], ignore_index = True).drop_duplicates(subset = "osmid", ignore_index = True, keep = "first").reset_index()
# edge_biketrackcarall = pd.concat([edge_biketrack, edge_carall], ignore_index = True).drop_duplicates(subset = ["u","v","osmid"], ignore_index = True, keep = "first").reset_index()
# G_biketrackcarall = osm_to_ig(node_biketrackcarall, edge_biketrackcarall)

In [None]:
# edge_biketrackcarall.head()

### Cluster analysis

In [None]:
for placeid, placeinfo in cities.items():
    print(placeid + ": Generating cluster connections")
    placepath = PATH["data"] + placeid + "/"

    # Load networks
    G_biketrack = csv_to_ig(placepath, placeid, 'biketrack')
    G_carall = csv_to_ig(placepath, placeid, 'carall')
    G_biketrackcarall = csv_to_ig(placepath, placeid, 'biketrackcarall')
    G = copy.deepcopy(G_biketrack) # G is the bike graph we are working with
    
    # Prepare clusterinfo
    clusters = []
    clusterinfo = {}
    i = 0
    total_length = sum(G.es["weight"])
    for j in range(len(list(G.components()))):
        if len(list(G.components())[j]) > 1:
            clusterinfo[i] = {"size": G.subgraph(list(G.components())[j]).vcount(), 
                              "centroid_id": highest_closeness_node(G.subgraph(list(G.components())[j]))[0],
                              "length": sum(G.subgraph(list(G.components())[j]).es["weight"])
                              }
            clusterinfo[i]["centroid_index"] = G.vs.find(id = clusterinfo[i]['centroid_id']).index
            i += 1

    cluster_indices = clusterindices_by_length(clusterinfo)

    clusterinfo_temp = {}
    length_covered = 0
    i = 0
    for c in cluster_indices:
        clusters.append(G.subgraph(list(G.components())[c]))
        clusterinfo_temp[i] = clusterinfo[c]
        length_covered += clusterinfo[c]["length"]
        if length_covered >= cutoff*total_length:
            break
        i += 1

    print('{:d}'.format(i+1) + " largest clusters of " + '{:d}'.format(len(list(G.components()))) + " considered. Length covered: " + '{:.2f}'.format(length_covered) + " km (" + '{:.0f}'.format(100*length_covered/total_length) + "% of total length)")

    clusterinfo = copy.deepcopy(clusterinfo_temp)

    cluster_indices = clusterindices_by_length(clusterinfo)
    
    # Generation
    GTs, GT_abstracts = greedy_triangulation_routing_clusters(G, G_biketrackcarall, clusters, clusterinfo, prune_quantiles, prune_measure)
    
    for GT in GTs:
        delete_overlaps(GT, G_biketrack)
    
    # Write results
    results = {"placeid": placeid, "prune_measure": prune_measure, "prune_quantiles": prune_quantiles, "GTs": GTs, "GT_abstracts": GT_abstracts}

    filename = placeid + '_clusters_' + prune_measure + "_cutoff" + "{:.2f}".format(cutoff)
    resultfile = open(PATH["results"] + filename + ".pickle", 'wb')
    pickle.dump(results, resultfile)
    resultfile.close()

In [None]:
if debug:
    pp.pprint(clusterinfo)

    plt.figure(figsize=[2*6.4, 2*4.8])
    for i in range(len(clusters)):
        plt.plot(clusters[i].vs['x'],clusters[i].vs['y'],'.')
    plt.gca().invert_yaxis()
    %config InlineBackend.figure_format = 'retina'

In [None]:
# # For testing
# # Get the pairs of distances between all clusters, where distance is routing distance of the closest nodes
# clusterpairs = clusterpairs_by_distance(G, G_biketrackcarall, clusters, clusterinfo, True, False, False)
# clusterpairs

In [None]:
# # For testing
# temp_list = set()
# for x in clusterpairs:
#     temp_list.add(x[1][0])
#     temp_list.add(x[1][1])
# temp_list = list(temp_list)
# temp_list
# my_plot_reset(G_biketrack, temp_list)
# ig.plot(G_biketrack)

In [None]:
if debug:
    for GT_abstract, prune_quantile in zip(GT_abstracts, prune_quantiles):
        ig.plot(GT_abstract, PATH["plots"] + placeid + '_GTclustersabstract_biketrack_cutoff' + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.2f}".format(prune_quantile) + '.png', bbox=(800,800))

### Plot just the clusters and their new connections

In [None]:
if debug:
    plt.figure(figsize=[2*6.4, 2*4.8])
    plt.plot(GTs[-1].vs['x'], GTs[-1].vs['y'], 'o', color='red', markersize=5)
    for i in range(len(clusters)):
        plt.plot(clusters[i].vs['x'], clusters[i].vs['y'], 'o', markersize=3)
    plt.gca().invert_yaxis()
    %config InlineBackend.figure_format = 'retina'

### Plot just the new connections

In [None]:
if debug:
    size_nodes(GTs[-1], 0)
    width_edges(GTs[-1], 2)
    color_edges(GTs[-1], "blue")
    ig.plot(GTs[-1])

### Plot the whole bike network and its new connections

In [None]:
if debug:
    for GT, prune_quantile in zip(GTs, prune_quantiles):
        plt.figure(figsize=[2*6.4, 2*4.8])
        plt.plot(GT.vs['x'], GT.vs['y'], 'o', color='red', markersize=3)
        plt.plot(G_biketrack.vs['x'], G_biketrack.vs['y'], 'o', color='black', markersize=1)
        plt.gca().invert_yaxis()
        %config InlineBackend.figure_format = 'retina'
        plt.savefig(PATH["plots"] + placeid + '_GTclusters_biketrack_cutoff' + "{:.2f}".format(cutoff) + "_" + prune_measures[prune_measure] + "{:.2f}".format(prune_quantile) + '.png')