# Step 8 - Exploratory analysis
## Project: Algorithmic bicycle network growth

This notebook is a sandbox for exploring results.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2021-02-08  
Last modified: 2021-04-23

## Preliminaries

### Parameters

In [None]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

### Setup

In [None]:
%run -i path.py
%run -i setup.py

%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [None]:
%run -i functions.py

### Constants

In [None]:
pois = ["grid", "railwaystation"]
measures = ["betweenness", "closeness"]
combs = list(itertools.product(pois, measures))
print(combs)

In [None]:
numcitiestotal = len(cities.keys())

### Load all results

In [None]:
analysis_result = {}
for p in pois:
    analysis_result[p] = {}
    for m in measures:
        analysis_result[p][m] = {}
        
        for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
            filename = placeid + '_poi_' + p + "_" + m + ".csv"
            analysis_result[p][m][placeid] = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True)
            if len(analysis_result[p][m][placeid]) == 0:
                analysis_result[p][m][placeid] = analysis_result[p][m][list(cities.keys())[0]]
                for n in analysis_result[p][m][placeid].dtype.names:
                    analysis_result[p][m][placeid][n] = [-1]*len(analysis_result[p][m][placeid][n])

In [None]:
analysis_existing = {}
for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
    filename = placeid + '_existing.csv'
    analysis_existing[placeid] = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True, usecols = (1,2,3,4,5,6,7,8,9,10,11))

## Directness and Efficiency for all cities

In this section, we want to check the hypothesis that there is a "dip" in the metrics of directness and efficiency, i.e. that there is a U-shaped form where the metric starts at a high value, then falls  (due to percolation / emergence of the giant component) and then grows back to an intermediate value. We do that for each combination of [('grid', 'betweenness'), ('grid', 'closeness'), ('railwaystation', 'betweenness'), ('railwaystation', 'closeness')]

To do that, first we select all cities where the minimum values of the metric are to the right of the maximum value. For those cities we then only plot the (x,y) pairs of the min, max, and end values.

### Directness min/max/end

In [None]:
directness_lcc = {}
for p in pois:
    directness_lcc[p] = {}
    for m in measures:
        directness_lcc[p][m] = {}
        directness_lcc[p][m]["x"] = {}
        directness_lcc[p][m]["y"] = {}

        directness_lcc[p][m]["y"]["min"] = [min(analysis_result[p][m][placeid]["directness_lcc"]) for placeid in cities.keys()]
        directness_lcc[p][m]["y"]["max"] = [max(analysis_result[p][m][placeid]["directness_lcc"]) for placeid in cities.keys()]
        directness_lcc[p][m]["y"]["end"] = [analysis_result[p][m][placeid]["directness_lcc"][-1] for placeid in cities.keys()]

        directness_lcc[p][m]["x"]["min"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == min(analysis_result[p][m][placeid]["directness_lcc"]))[0][-1] for placeid in cities.keys()]
        directness_lcc[p][m]["x"]["max"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == max(analysis_result[p][m][placeid]["directness_lcc"]))[0][-1] for placeid in cities.keys()]
        directness_lcc[p][m]["x"]["end"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == analysis_result[p][m][placeid]["directness_lcc"][-1])[0][-1] for placeid in cities.keys()]       

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10, 8), squeeze = True)
axes = axes.flatten()

# Plot data
for i,ax in enumerate(axes):
    ind = np.where(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"]) > np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    ax.plot([np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind]], [np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind]], ':', color="red", alpha=0.3);
    
#     ind = np.where(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"]) >= 0)[0]
    ax.plot([np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["end"])[ind]], [np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["end"])[ind]], ':', color="green", alpha=0.3);
    
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["max"])[ind], '^r', label='max');
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind], 'vg', label='min');
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["end"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["end"])[ind], 'ok', label='end', markerfacecolor='none');
    
    ax.set_xlabel(combs[i][1] + ' quantile')
    ax.set_ylabel('Directness of LCC')
    ax.set_xlim([-1,40])
    ax.set_ylim([0.4,1]) #0.35
    if i == 0: ax.legend(loc='lower right');
    if i == 1 or i == 3:
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.text(42, 0.8, combs[i][0], rotation = 90, horizontalalignment = "center", verticalalignment='center')
    if i == 0 or i == 1:
        ax.set_xlabel('')
        ax.set_xticklabels([])

### Efficiency min/max/end

In [None]:
efficiency_global = {}
for p in pois:
    efficiency_global[p] = {}
    for m in measures:
        efficiency_global[p][m] = {}
        efficiency_global[p][m]["x"] = {}
        efficiency_global[p][m]["y"] = {}

        efficiency_global[p][m]["y"]["min"] = [min(analysis_result[p][m][placeid]["efficiency_global"]) for placeid in cities.keys()]
        efficiency_global[p][m]["y"]["max"] = [max(analysis_result[p][m][placeid]["efficiency_global"]) for placeid in cities.keys()]
        efficiency_global[p][m]["y"]["end"] = [analysis_result[p][m][placeid]["efficiency_global"][-1] for placeid in cities.keys()]

        efficiency_global[p][m]["x"]["min"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == min(analysis_result[p][m][placeid]["efficiency_global"]))[0][-1] for placeid in cities.keys()]
        efficiency_global[p][m]["x"]["max"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == max(analysis_result[p][m][placeid]["efficiency_global"]))[0][-1] for placeid in cities.keys()]
        efficiency_global[p][m]["x"]["end"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == analysis_result[p][m][placeid]["efficiency_global"][-1])[0][-1] for placeid in cities.keys()]       

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10, 8), squeeze = True)
axes = axes.flatten()

# Plot data
for i,ax in enumerate(axes):
   
    ind = np.where(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"]) > np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    ax.plot([np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind]], [np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind]], ':', color="red", alpha=0.3);
    
#     ind = np.where(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"]) >= 0)[0]
    ax.plot([np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["end"])[ind]], [np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["end"])[ind]], ':', color="green", alpha=0.3);
    
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["max"])[ind], '^r', label='max');
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind], 'vg', label='min');
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["end"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["end"])[ind], 'ok', label='end', markerfacecolor='none');
    
    ax.set_xlabel(combs[i][1] + ' quantile')
    ax.set_ylabel('Global Efficiency')
    ax.set_xlim([-1,40])
    ax.set_ylim([0,1])
    if i == 0: ax.legend(loc='lower center');
    if i == 1 or i == 3:
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.text(42, 0.55, combs[i][0], rotation = 90, horizontalalignment = "center", verticalalignment='center')
    if i == 0 or i == 1:
        ax.set_xlabel('')
        ax.set_xticklabels([])

## Comparing grown with existing metrics

Here we plot for some metrics how much higher the metric of the grown network is compared to the existing network of same length. This only works for cities that have a small enough existing length which is at some point reached by the grown network.

In [None]:
comp_keys = ["length_lcc", "efficiency_global_routed", "efficiency_local_routed", "coverage"]
comp_labels = ["Length of LCC", "Global Efficiency", "Local Efficiency", "Coverage"]

for p in pois:
    for m in measures:
        numcities = 0
        values = []

        if debug: fig = plt.figure(figsize=(400/plotparam["dpi"], 400/plotparam["dpi"]), dpi=plotparam["dpi"])
        if debug: plt.semilogy([min(x), max(x)], [1,1], "k--")
        for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
            length_existing = analysis_existing[placeid][analysis_existing_rowkeys["biketrack"]]["length"]
            if length_existing and np.argmax(analysis_result[p][m][placeid]["length"] > length_existing):
                numcities += 1
                id_samelen = np.argmax(analysis_result[p][m][placeid]["length"] > length_existing) - 1

                x = list(range(len(comp_keys)))
                y = [[analysis_result[p][m][placeid][comp_keys[i]][id_samelen] / analysis_existing[placeid][analysis_existing_rowkeys["biketrack"]][comp_keys[i]]] for i in x]
                values.append(y)
                if debug: plt.semilogy(x, y, "o")

        print(str(numcities) + " cities found where L_grown=L for " + p + " | " + m)
        values = np.log10(np.array(values))
        fig = plt.figure(figsize=(300/plotparam["dpi"], 300/plotparam["dpi"]), dpi=plotparam["dpi"])
        axes = fig.add_axes([0, 0, 1, 1])
        axes.plot([0,0], [min(x), max(x)+2],"k:")
        # https://stackoverflow.com/questions/18500011/horizontal-box-plots-in-matplotlib-pandas#56088231
        axes.boxplot([values[np.isfinite(values[:,i]).flatten(),i].flatten().tolist() for i in x], vert=False, showfliers=False); #with isfinite we exclude zeros, infs, or nans which sometimes happen for e.g. local efficiency
        #axes.set_xlabel('$M_{grown}/M$ at $L_{grown}=L$')
        axes.set_xlabel('Grown vs. existing metric (at same length)')
        axes.set_yticklabels([comp_labels[i] for i in x]);
        axes.set_ylim([min(x)+0.5, max(x)+1.5])
        # fig.autofmt_xdate(rotation=45)

        axes.set_xlim([-0.5, 1.55])
        axes.set_xticks([-0.301,0,0.301, 0.699, 1, 1.301])
        axes.set_xticklabels([0.5, 1, 2,5, 10, 20])
        axes.xaxis.set_minor_locator(matplotlib.ticker.FixedLocator(np.log10(np.concatenate((np.linspace(0,1,10, endpoint=False),np.linspace(1, 10,9, endpoint=False),np.linspace(10, 100,9, endpoint=False))))))
        axes.text(1.5, 4.3, p + " | " + m, fontsize=10, horizontalalignment='right');
        fig.savefig(PATH["plots"] + 'grownvsexisting_poi_' + p + "_" + m + '.png', facecolor = "white", edgecolor = 'none', bbox_inches="tight")