# Step 8 - Exploratory analysis
## Project: Algorithmic bicycle network growth

This notebook is a sandbox for exploring results.

Contact: Michael Szell (michael.szell@gmail.com)  
Created: 2021-02-08  
Last modified: 2021-05-04

## Preliminaries

### Parameters

In [None]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"
plotnotitle = True # If True, will not plot a text title for the plots on top left

### Setup

In [None]:
%run -i path.py
%run -i setup.py
plt.style.use(PATH["parameters"] + 'plotstyle.mplstyle')

%load_ext watermark
%watermark -n -v -m -g -iv

### Functions

In [None]:
%run -i functions.py

### Constants

In [None]:
pois = ["grid", "railwaystation"]
measures = ["betweenness", "closeness"]#, "random"]
combs = list(itertools.product(pois, measures))
print(combs)

In [None]:
numcitiestotal = len(cities.keys())
numcitiestotal

### Load all results

In [None]:
analysis_result = {}
for p in pois:
    analysis_result[p] = {}
    for m in measures:
        analysis_result[p][m] = {}
        
        for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
            filename = placeid + '_poi_' + p + "_" + m + ".csv"
            analysis_result[p][m][placeid] = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True)
            if len(analysis_result[p][m][placeid]) == 0:
                analysis_result[p][m][placeid] = analysis_result[p][m][list(cities.keys())[0]]
                for n in analysis_result[p][m][placeid].dtype.names:
                    analysis_result[p][m][placeid][n] = [-1]*len(analysis_result[p][m][placeid][n])

In [None]:
analysis_existing = {}
for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
    filename = placeid + '_existing.csv'
    analysis_existing[placeid] = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True, usecols = (1,2,3,4,5,6,7,8,9,10,11))

## Directness and Efficiency for all cities

In this section, we want to check the hypothesis that there is a "dip" in the metrics of directness and efficiency, i.e. that there is a U-shaped form where the metric starts at a high value, then falls  (due to percolation / emergence of the giant component) and then grows back to an intermediate value. We do that for each combination of [('grid', 'betweenness'), ('grid', 'closeness'), ('railwaystation', 'betweenness'), ('railwaystation', 'closeness')]

To do that, first we select all cities where the minimum values of the metric are to the right of the maximum value. For those cities we then only plot the (x,y) pairs of the min, max, and end values.

### Directness min/max/end

In [None]:
directness_lcc = {}
for p in pois:
    directness_lcc[p] = {}
    for m in measures:
        directness_lcc[p][m] = {}
        directness_lcc[p][m]["x"] = {}
        directness_lcc[p][m]["y"] = {}

        directness_lcc[p][m]["y"]["min"] = [min(analysis_result[p][m][placeid]["directness_lcc"]) for placeid in cities.keys()]
        directness_lcc[p][m]["y"]["max"] = [max(analysis_result[p][m][placeid]["directness_lcc"]) for placeid in cities.keys()]
        directness_lcc[p][m]["y"]["end"] = [analysis_result[p][m][placeid]["directness_lcc"][-1] for placeid in cities.keys()]

        directness_lcc[p][m]["x"]["min"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == min(analysis_result[p][m][placeid]["directness_lcc"]))[0][-1] for placeid in cities.keys()]
        directness_lcc[p][m]["x"]["max"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == max(analysis_result[p][m][placeid]["directness_lcc"]))[0][-1] for placeid in cities.keys()]
        directness_lcc[p][m]["x"]["end"] = [np.where(analysis_result[p][m][placeid]["directness_lcc"] == analysis_result[p][m][placeid]["directness_lcc"][-1])[0][-1] for placeid in cities.keys()]       

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10, 8), squeeze = True)
axes = axes.flatten()

# Plot data
for i,ax in enumerate(axes):
    ind = np.where(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"]) > np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    ax.plot([np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind]], [np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind]], ':', color="red", alpha=0.3);
    
#     ind = np.where(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"]) >= 0)[0]
    ax.plot([np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["end"])[ind]], [np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["end"])[ind]], ':', color="green", alpha=0.3);
    
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["max"])[ind], '^r', label='max');
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["min"])[ind], 'vg', label='min');
    ax.plot(np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["x"]["end"])[ind], np.asarray(directness_lcc[combs[i][0]][combs[i][1]]["y"]["end"])[ind], 'ok', label='end', markerfacecolor='none');
    
    ax.set_xlabel(combs[i][1] + ' quantile')
    ax.set_ylabel('Directness of LCC')
    ax.set_xlim([-1,40])
    ax.set_ylim([0.4,1]) #0.35
    if i == 0: ax.legend(loc='lower right');
    if i == 1 or i == 3:
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.text(42, 0.8, combs[i][0], rotation = 90, horizontalalignment = "center", verticalalignment='center')
    if i == 0 or i == 1:
        ax.set_xlabel('')
        ax.set_xticklabels([])

### Efficiency min/max/end

In [None]:
efficiency_global = {}
for p in pois:
    efficiency_global[p] = {}
    for m in measures:
        efficiency_global[p][m] = {}
        efficiency_global[p][m]["x"] = {}
        efficiency_global[p][m]["y"] = {}

        efficiency_global[p][m]["y"]["min"] = [min(analysis_result[p][m][placeid]["efficiency_global"]) for placeid in cities.keys()]
        efficiency_global[p][m]["y"]["max"] = [max(analysis_result[p][m][placeid]["efficiency_global"]) for placeid in cities.keys()]
        efficiency_global[p][m]["y"]["end"] = [analysis_result[p][m][placeid]["efficiency_global"][-1] for placeid in cities.keys()]

        efficiency_global[p][m]["x"]["min"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == min(analysis_result[p][m][placeid]["efficiency_global"]))[0][-1] for placeid in cities.keys()]
        efficiency_global[p][m]["x"]["max"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == max(analysis_result[p][m][placeid]["efficiency_global"]))[0][-1] for placeid in cities.keys()]
        efficiency_global[p][m]["x"]["end"] = [np.where(analysis_result[p][m][placeid]["efficiency_global"] == analysis_result[p][m][placeid]["efficiency_global"][-1])[0][-1] for placeid in cities.keys()]       

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10, 8), squeeze = True)
axes = axes.flatten()

# Plot data
for i,ax in enumerate(axes):
   
    ind = np.where(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"]) > np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    ax.plot([np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind]], [np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind]], ':', color="red", alpha=0.3);
    
#     ind = np.where(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"]) >= 0)[0]
    ax.plot([np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["end"])[ind]], [np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["end"])[ind]], ':', color="green", alpha=0.3);
    
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["max"])[ind], '^r', label='max');
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["min"])[ind], 'vg', label='min');
    ax.plot(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["end"])[ind], np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["y"]["end"])[ind], 'ok', label='end', markerfacecolor='none');
    
    ax.set_xlabel(combs[i][1] + ' quantile')
    ax.set_ylabel('Global Efficiency')
    ax.set_xlim([-1,40])
    ax.set_ylim([0,1])
    if i == 0: ax.legend(loc='lower center');
    if i == 1 or i == 3:
        ax.set_ylabel('')
        ax.set_yticklabels([])
        ax.text(42, 0.55, combs[i][0], rotation = 90, horizontalalignment = "center", verticalalignment='center')
    if i == 0 or i == 1:
        ax.set_xlabel('')
        ax.set_xticklabels([])

### Plotting full curves

In [None]:
ind
list(cities.keys())[5]

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 4, figsize=(1200/plotparam["dpi"], 300/plotparam["dpi"]), dpi=plotparam["dpi"], squeeze = True)
axes = axes.flatten()

for i, comb in enumerate(combs):
    p = comb[0]
    m = comb[1]
    ind = np.where(np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["min"]) > np.asarray(efficiency_global[combs[i][0]][combs[i][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    print([list(cities.keys())[i] for i in ind])
    
    ax = axes[i]

    for i in ind:
        placeid = list(cities.keys())[i]
        tmp, = ax.plot(prune_quantiles, analysis_result[p][m][placeid]["efficiency_global_routed"])
        tmp.set_label('_hidden')
    ax.set_title(comb)

Next: Plotting single cities with the biggest dips for only grid,betweenness and railwaystation, betweenness

In [None]:
[combs[0],combs[2]]

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 15, figsize=(1800/plotparam["dpi"], 300/plotparam["dpi"]), dpi=plotparam["dpi"], squeeze = True)
axes = axes.flatten()

for i, comb in enumerate([combs[0],combs[2]]):
    p = comb[0]
    m = comb[1]
    ind = np.where(np.asarray(efficiency_global[combs[i*2][0]][combs[i*2][1]]["x"]["min"]) > np.asarray(efficiency_global[combs[i*2][0]][combs[i*2][1]]["x"]["max"]))[0]
    print(str(len(ind)) + "/" + str(numcitiestotal) + " cities found with x_min>x_max for "+ str(combs[i]))
    print([list(cities.keys())[i] for i in ind])
    
    
    for j in range(len(ind)):
        ax = axes[j + i*len(ind)]
        placeid = list(cities.keys())[ind[j]]
        tmp, = ax.plot(prune_quantiles, analysis_result[p][m][placeid]["efficiency_global_routed"])
        tmp.set_label('_hidden')
        ax.set_title(placeid)
        ax.set_ylim([0,1])

Now plot only the biggest dips

In [None]:
cities.keys()

In [None]:
bigdips = ["delft", "copenhagen", "boston", "paris", "barcelona", "sheffield"]

fig, axes = plt.subplots(nrows = 2, ncols = 3, figsize=(300/plotparam["dpi"], 220/plotparam["dpi"]), dpi=plotparam["dpi"], squeeze = True)
axes = axes.flatten()

i = 0
comb = combs[0]
p = comb[0]
m = comb[1]

for j,placeid in enumerate(bigdips):  
    ax = axes[j]
    tmp, = ax.plot(prune_quantiles, analysis_result[p][m][placeid]["efficiency_global_routed"], **plotparam_analysis["bikegrown_" + m])
    ax.text(0.96, 0.08, cities[placeid]["name"], fontsize=8, horizontalalignment='right')
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_xticks([0, 0.5, 1])
    if j==4:
        ax.set_xlabel('Betweenness quantile')
    if j <= 2:
        ax.set_xlabel('')
        ax.set_xticklabels([])
    else:
        ax.set_xticklabels([0,0.5,1])
    ax.set_yticks([0,0.25,0.5,0.75,1])
    if j % 3 == 0:
        ax.set_yticklabels([0,0.25,0.5,0.75,1])
    else:
        ax.set_ylabel('')
        ax.set_yticklabels([])
    if j == 1:
        ax.set_title('Dips in global efficiency')
plt.subplots_adjust(top = 0.90, bottom = 0.16, left = 0.12, right = 0.97, wspace = 0.22, hspace = 0.22)
fig.savefig(PATH["plots"] + "/" + 'dipsglobalefficiency_poi_' + p + '.eps', facecolor = "white", edgecolor = 'none')

Now figure for paper: Directness and Global eff, for Boston (big left) and 4 cities (small right)

In [None]:
from matplotlib.font_manager import findfont, FontProperties
font = findfont(FontProperties(family=['sans-serif']))
font

In [None]:
smallmultiples = ["montreal", "mumbai", "paris", "tokyo"]
checkpoints = [0.025, 0.1, 0.2]
xticks = [0, 0.1, 0.2,0.3,0.4]
yticks = [0,0.2,0.4,0.6,0.8,1]

fig, axes = plt.subplots(nrows = 4, ncols = 4, figsize=(300/plotparam["dpi"], 300/plotparam["dpi"]), dpi=plotparam["dpi"], squeeze = True)

i = 0
comb = combs[0]
p = comb[0]
m = comb[1]
smallmultipleindex = 0

for row in [1,2,3,4]:
    for col in [1,2,3,4]:
        i += 1
        placeid = "boston"
        if col >= 3: # Right half: small multiple
            ax = plt.subplot(4,4,i)
            placeid = smallmultiples[smallmultipleindex]
            smallmultipleindex += 1
            smallmultipleindex = smallmultipleindex % len(smallmultiples)
            ax.text(0.96*xticks[-1], 0.08, cities[placeid]["name"], fontsize=8, horizontalalignment='right')
            if row <= 2:
                ax.plot(prune_quantiles, analysis_result[p][m][placeid]["directness_lcc"], **plotparam_analysis["bikegrown_" + m])
            else:
                ax.plot(prune_quantiles, analysis_result[p][m][placeid]["efficiency_global_routed"], **plotparam_analysis["bikegrown_" + m])
            ax.set_yticklabels([])
            ax.set_yticks(yticks)
            ax.set_xticks(xticks)
            if row == 4:
                ax.set_xticklabels('')
            else:
                ax.set_xticklabels('')
            
        elif (row == 1 and col == 1): # Top left
            ax = plt.subplot(4,4,(1,6))
            ax.plot(prune_quantiles, analysis_result[p][m][placeid]["directness_lcc"], **plotparam_analysis["bikegrown_" + m])
            ax.text(0.98*xticks[-1], 0.04, cities[placeid]["name"], fontsize=8, horizontalalignment='right')
            ax.set_yticks(yticks)
            ax.set_yticklabels(yticks)
            ax.set_ylabel('Directness')
            ax.set_xticks(xticks)
            ax.set_xticklabels([])
            for c in checkpoints:
                ax.plot([c, c], [0, 1], ":k", linewidth = 1)
            
        elif (row == 3 and col == 1): # Bottom left
            ax = plt.subplot(4,4,(9,14))
            ax.text(0.98*xticks[-1], 0.04, cities[placeid]["name"], fontsize=8, horizontalalignment='right')
            ax.plot(prune_quantiles, analysis_result[p][m][placeid]["efficiency_global_routed"], **plotparam_analysis["bikegrown_" + m])
            ax.set_yticks(yticks)
            ax.set_yticklabels(yticks)
            ax.set_ylabel('Global efficiency')
            ax.set_xlabel('Betweenness quantile B')
            ax.set_xticks(xticks)
            ax.set_xticklabels(xticks)
            for c in checkpoints:
                ax.plot([c, c], [0, 1], ":k", linewidth = 1)
            
        ax.set_xlim([0, 0.4])
        ax.set_ylim(0, 1)
        
plt.subplots_adjust(top = 0.98, bottom = 0.13, left = 0.14, right = 0.97, wspace = 0.22, hspace = 0.22)
            
            

fig.savefig(PATH["plots"] + "/" + 'smallmultiples_poi_' + p + '.eps', facecolor = "white", edgecolor = 'none')

## Comparing grown with existing metrics

Here we plot for some metrics how much higher the metric of the grown network is compared to the existing network of same length. This only works for cities that have a small enough existing length which is at some point reached by the grown network.

In [None]:
comp_keys = ["length_lcc", "efficiency_global_routed", "efficiency_local_routed", "coverage"]
comp_labels = ["Length of LCC", "Global Efficiency", "Local Efficiency", "Coverage"]

cities_here = []
# TODO: Also look at random case!

for p in pois:
    for m in measures:
        numcities = 0
        values = []

        if debug: fig = plt.figure(figsize=(400/plotparam["dpi"], 400/plotparam["dpi"]), dpi=plotparam["dpi"])
        if debug: plt.semilogy([min(x), max(x)], [1,1], "k--")
        for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):
            length_existing = analysis_existing[placeid][analysis_existing_rowkeys["biketrack"]]["length"]
            if length_existing and np.argmax(analysis_result[p][m][placeid]["length"] > length_existing):
                numcities += 1
                id_samelen = np.argmax(analysis_result[p][m][placeid]["length"] > length_existing) - 1

                x = list(range(len(comp_keys)))
                y = [[analysis_result[p][m][placeid][comp_keys[i]][id_samelen] / analysis_existing[placeid][analysis_existing_rowkeys["biketrack"]][comp_keys[i]]] for i in x]
                values.append(y)
                cities_here.append(placeid)
                if debug: plt.semilogy(x, y, "o")

        print(str(numcities) + " cities found where L_grown=L for " + p + " | " + m)
        print(cities_here)
        values = np.log10(np.array(values))
        fig = plt.figure(figsize=(200/plotparam["dpi"], 100/plotparam["dpi"]), dpi=plotparam["dpi"])
        axes = fig.add_axes([0, 0, 1, 1])
        axes.plot([0,0], [min(x), max(x)+2],"k:", linewidth=0.5)
        # https://stackoverflow.com/questions/18500011/horizontal-box-plots-in-matplotlib-pandas#56088231
        axes.boxplot([values[np.isfinite(values[:,i]).flatten(),i].flatten().tolist() for i in x], vert=False, showfliers=False); #with isfinite we exclude zeros, infs, or nans which sometimes happen for e.g. local efficiency
        axes.set_xlabel('$M_{syn}/M_{real}$ | $L_{syn}=L_{real}$')
        axes.set_title('Synthetic versus real networks')
        axes.set_yticklabels([comp_labels[i] for i in x]);
        axes.set_ylim([min(x)+0.5, max(x)+1.5])
        # fig.autofmt_xdate(rotation=45)

#         axes.set_xlim([-0.5, 1.55])
        axes.set_xlim([-0.35, 1.55])
        axes.set_xticks([-0.301,0,0.301, 0.699, 1, 1.301])
        axes.set_xticklabels([0.5, 1, 2,5, 10, 20])
        axes.xaxis.set_minor_locator(matplotlib.ticker.FixedLocator(np.log10(np.concatenate((np.linspace(0,1,10, endpoint=False),np.linspace(1, 10,9, endpoint=False),np.linspace(10, 100,9, endpoint=False))))))
#         axes.text(1.5, 4, p + " | " + m, fontsize=8, horizontalalignment='right');
        fig.savefig(PATH["plots"] + 'grownvsexisting_poi_' + p + "_" + m + '.eps', facecolor = "white", edgecolor = 'none', bbox_inches="tight")

## Average analysis plot

Here we plot one analysis figure as an average over all cities.

### Each parameter set as a single figure

In [None]:
# %%capture

# Run all parameter sets
poi_source_list = ["grid", "railwaystation"]
prune_measure_list = ["betweenness", "closeness", "random"]
parsets_used = list(itertools.product(poi_source_list, prune_measure_list))


for poi_source, prune_measure in parsets_used:
    print(poi_source, prune_measure)
        
    analysis_result_city = {}
    numcities = 0
    for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):

        # PLOT Analysis
        filename = placeid + '_poi_' + poi_source + "_" + prune_measure + ".csv"
        analysis_result_city_temp = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True)
        if len(analysis_result_city_temp) == 0: # Discard if no results (for example no railwaystations)
            print(placeid + ": No analysis results available")
            continue
        else:
            numcities += 1
            analysis_result_city[placeid] = analysis_result_city_temp
            metric_keys = analysis_result_city[placeid].dtype.names
    
    # All cities are loaded, no create the average values
    analysis_result = {}
    for metric in metric_keys:
        temp = np.zeros([40, numcities])
        for i, placeid_this in enumerate(analysis_result_city.keys()):
            temp[:, i] = analysis_result_city[placeid_this][metric]
        analysis_result[metric] = np.mean(temp, axis = 1)
                        
                    
    # Plot
    nc = 5
    fig, axes = plt.subplots(nrows = 2, ncols = nc, figsize = (16, 6))
    keys_metrics = {"length": "Length [km]","coverage": "Coverage [km$^2$]","overlap_biketrack": "Overlap Protected","directness_lcc": "Directness of LCC","efficiency_global": "Global Efficiency",
            "length_lcc": "Length of LCC [km]","poi_coverage": "POIs covered","overlap_bikeable": "Overlap Bikeable","components": "Components","efficiency_local": "Local Efficiency"}

    for i, ax in enumerate(axes[0]):
        key = list(keys_metrics.keys())[i]
        if key in ["overlap_biketrack", "overlap_bikeable"]:
            ax.plot(prune_quantiles, analysis_result[key] / analysis_result["length"], **plotparam_analysis["bikegrown"])
        elif key in ["efficiency_global", "efficiency_local"]:
            ax.plot(prune_quantiles, analysis_result[key], **plotparam_analysis["bikegrown_abstract"])
            tmp, = ax.plot(prune_quantiles, analysis_result[key+"_routed"], **plotparam_analysis["bikegrown"])
            tmp.set_label('_hidden')
        elif key in ["length", "length_lcc"]: # Convert m->km
            ax.plot(prune_quantiles, analysis_result[key]/1000, **plotparam_analysis["bikegrown"])
        else:
            ax.plot(prune_quantiles, analysis_result[key], **plotparam_analysis["bikegrown"])

        if i == 0:
            ymax0 = ax.get_ylim()[1]
            ax.set_ylim(0, ymax0)
            ax.text(-0.15, ymax0*1.25, "Average city" + " (" + poi_source + " | " + prune_measure + ")", fontsize=16, horizontalalignment='left')
            ax.legend(loc = 'upper left')
        if i == 4:
            ax.legend(loc = 'best')


        set_analysissubplot(key)
        ax.set_title(list(keys_metrics.values())[i])
        ax.set_xlabel('')
        ax.set_xticklabels([])


    for i, ax in enumerate(axes[1]):
        key = list(keys_metrics.keys())[i+nc]
        if key in ["overlap_biketrack", "overlap_bikeable"]:
            ax.plot(prune_quantiles, analysis_result[key] / analysis_result["length"], **plotparam_analysis["bikegrown"])
        elif key in ["efficiency_global", "efficiency_local"]:
            ax.plot(prune_quantiles, analysis_result[key], **plotparam_analysis["bikegrown_abstract"])
            ax.plot(prune_quantiles, analysis_result[key+"_routed"], **plotparam_analysis["bikegrown"])
        elif key in ["length", "length_lcc"]: # Convert m->km
            ax.plot(prune_quantiles, analysis_result[key]/1000, **plotparam_analysis["bikegrown"])
        else:
            ax.plot(prune_quantiles, analysis_result[key], **plotparam_analysis["bikegrown"])

        if i == 0:
            ax.set_ylim(0, ymax0)
        set_analysissubplot(key)
        ax.set_title(list(keys_metrics.values())[i+nc])
        ax.set_xlabel(prune_measure + ' quantile')
        if key in ["poi_coverage"]:
            # https://stackoverflow.com/questions/30914462/matplotlib-how-to-force-integer-tick-labels
            ax.yaxis.set_major_locator(MaxNLocator(integer=True)) 

    plt.subplots_adjust(top = 0.87, bottom = 0.09, left = 0.05, right = 0.97, wspace = 0.25, hspace = 0.25)
    fig.savefig(PATH["plots"] + "/" + 'averagecity_analysis_poi_' + poi_source + "_" + prune_measure + '_noconstr.png', facecolor = "white", edgecolor = 'none')
    plt.close()

### Figures with bundled parameter sets

In [None]:
def set_analysissubplot_special(key, p):
    if p == "grid":
        if key in ["components"]:
            ax.set_ylim(top = 25)
        if key in ["efficiency_local"]:
            ax.set_ylim(top = 0.25)
        if key in ["overlap_bikeable"]:
            ax.set_ylim(top = 0.25)
        if key in ["overlap_biketrack"]:
            ax.set_ylim(top = 0.06)
        if key in ["directness_lcc"]:
            ax.set_ylim(bottom = 0.47)
            ax.set_ylim(top = 0.81)
    elif p == "railwaystation":
        if key in ["components"]:
            ax.set_ylim(top = 25)
        if key in ["efficiency_local"]:
            ax.set_ylim(top = 0.25)
        if key in ["overlap_bikeable"]:
            ax.set_ylim(top = 0.25)
        if key in ["overlap_biketrack"]:
            ax.set_ylim(top = 0.06)

In [None]:
# %%capture
plt.style.use(PATH["parameters"] + 'plotstyle.mplstyle')
# Run all parameter sets
prune_measure_list = ["betweenness", "closeness", "random"]
poi_source = "grid" # railwaystation grid

analysis_result_city = {}
analysis_result = {}
for m in prune_measure_list:
    analysis_result_city[m] = {}
    numcities = 0
    for placeid, placeinfo in tqdm(cities.items(), desc="Cities"):

        # PLOT Analysis
        filename = placeid + '_poi_' + poi_source + "_" + m + ".csv"
        analysis_result_city_temp = np.genfromtxt(PATH["results"] + placeid + "/" + filename, delimiter=',', names=True)
        if len(analysis_result_city_temp) == 0: # Discard if no results (for example no railwaystations)
            print(placeid + ": No analysis results available")
            continue
        else:
            numcities += 1
            analysis_result_city[m][placeid] = analysis_result_city_temp
            metric_keys = analysis_result_city[m][placeid].dtype.names

    # All cities are loaded, no create the average values
    analysis_result[m] = {}
    for metric in metric_keys:
        temp = np.zeros([40, numcities])
        for i, placeid_this in enumerate(analysis_result_city[m].keys()):
            temp[:, i] = analysis_result_city[m][placeid_this][metric]
        analysis_result[m][metric] = np.mean(temp, axis = 1)


# Plot
nc = 4
fig, axes = plt.subplots(nrows = 2, ncols = nc, figsize=(640/plotparam["dpi"], 320/plotparam["dpi"]), dpi=plotparam["dpi"])
keys_metrics = {"length": "Length [km]","coverage": "Coverage [km$^2$]", "directness_lcc": "Directness of LCC","efficiency_global": "Global Efficiency",
        "length_lcc": "Length of LCC [km]","poi_coverage": "POIs covered","components": "Components","efficiency_local": "Local Efficiency"}

for i, ax in enumerate(axes[0]):
    key = list(keys_metrics.keys())[i]
    for m in prune_measure_list:
        if key in ["overlap_biketrack", "overlap_bikeable"]:
            ax.plot(prune_quantiles, analysis_result[m][key] / analysis_result[m]["length"], **plotparam_analysis["bikegrown_" + m])
        elif key in ["efficiency_global", "efficiency_local"]:
            tmp, = ax.plot(prune_quantiles, analysis_result[m][key+"_routed"], **plotparam_analysis["bikegrown_" + m])
            tmp.set_label('_hidden')
        elif key in ["length", "length_lcc"]: # Convert m->km
            ax.plot(prune_quantiles, analysis_result[m][key]/1000, **plotparam_analysis["bikegrown_" + m])
        else:
            ax.plot(prune_quantiles, analysis_result[m][key], **plotparam_analysis["bikegrown_" + m])
            
        if key in ["directness_lcc"]:
            ax.arrow(0.325, 0.57, 0, -0.04, color = "orange", width = 0.01, head_width = 0.05, head_length = 0.02)

        if i == 0:
            ymax0 = ax.get_ylim()[1]
            ax.set_ylim(0, ymax0)
            if not plotnotitle:
                ax.text(-0.15, ymax0*1.25, "Average city" + " (" + poi_source + ")", fontsize=16, horizontalalignment='left')
        


    set_analysissubplot(key)
    set_analysissubplot_special(key, poi_source)
    ax.set_title(list(keys_metrics.values())[i])
    ax.set_xlabel('')
    ax.set_xticklabels([])


for i, ax in enumerate(axes[1]):
    key = list(keys_metrics.keys())[i+nc]
    for m in prune_measure_list:
        if key in ["overlap_biketrack", "overlap_bikeable"]:
            ax.plot(prune_quantiles, analysis_result[m][key] / analysis_result[m]["length"], **plotparam_analysis["bikegrown_" + m])
        elif key in ["efficiency_global", "efficiency_local"]:
            ax.plot(prune_quantiles, analysis_result[m][key+"_routed"], **plotparam_analysis["bikegrown_" + m])
        elif key in ["length", "length_lcc"]: # Convert m->km
            ax.plot(prune_quantiles, analysis_result[m][key]/1000, **plotparam_analysis["bikegrown_" + m])
        else:
            ax.plot(prune_quantiles, analysis_result[m][key], **plotparam_analysis["bikegrown_" + m])

        if i == 0:
            ax.set_ylim(0, ymax0)
        set_analysissubplot(key)
        set_analysissubplot_special(key, poi_source)
        ax.set_title(list(keys_metrics.values())[i+nc])
        ax.set_xlabel('Growth strategy quantile')
        if key in ["poi_coverage"]:
            # https://stackoverflow.com/questions/30914462/matplotlib-how-to-force-integer-tick-labels
            ax.set_yticks([0,40,80,120,160])
        if i == 3:
            leg = ax.legend(loc = 'best')
            leg.get_frame().set_linewidth(0.5)

if plotnotitle:
    plt.subplots_adjust(top = 0.92, bottom = 0.14, left = 0.05, right = 0.97, wspace = 0.28, hspace = 0.28)
else:
    plt.subplots_adjust(top = 0.82, bottom = 0.14, left = 0.05, right = 0.97, wspace = 0.28, hspace = 0.28)
fig.savefig(PATH["plots"] + "/" + 'averagecity_analysis_poi_' + poi_source + '_noconstr.eps', facecolor = "white", edgecolor = 'none')
plt.close()

## Basic stats on all cities

### World map

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world[(world.name!="Antarctica")] #### exclude Antartica


cities = pd.read_csv(PATH['parameters'] + 'cities_all_lat_lng.csv') #### read cities lat and lon

cities_x = np.array(cities['lng'].tolist())
cities_y = np.array(cities['lat'].tolist())


fig, ax = plt.subplots(figsize=(640/plotparam["dpi"], 320/plotparam["dpi"]), dpi=plotparam["dpi"], squeeze = True)
ax.set_facecolor('aliceblue')  #### color background with blue to represent oceans


world.plot(ax=ax,color='lightgrey', edgecolor='whitesmoke', linewidth=0.5)
plt.plot(cities_x,cities_y,'.',color='black', markersize=6) # 'o',markersize=5, markerfacecolor='black',markeredgewidth=0.8, markeredgecolor='grey'
ax.set_ylim([-65, 90])
ax.set_xlim([-180, 180])
plt.xticks([])
plt.yticks([])



###################  Europe inset
axins = ax.inset_axes([0.01, 0.01, 0.251, 0.48]) ### location and size of the inset [x_0,y_0, width, height]

world.plot(color='lightgrey', edgecolor='whitesmoke',ax=axins, linewidth=0.6)
axins.plot(cities_x,cities_y,'.',color='black', markersize=7)
axins.set_facecolor('aliceblue')
# sub region of the original image
x1, x2, y1, y2 = -5, 22, 40.5, 61 #### coordinates to zoom in
axins.set_xlim(x1, x2)
axins.set_ylim(y1, y2)
axins.set_xticks([])
axins.set_yticks([])
# ax.indicate_inset_zoom(axins, edgecolor="black")  ### lines towards the inset
plt.tight_layout()
# plt.subplots_adjust(top = 1.12, bottom = 0, left = 0, right = 1)
fig.savefig('world_insets.eps',bbox='tight')