# Analysis of distribution of shape statistics

**Part 2: plot**

This notebook visualizes the results of the peak-finding procedure from the previous notebook, `04a_peaks_compute`. 

In [2]:
import os
os.environ['USE_PYGEOS'] = '0'
import json
import geopandas
import pandas
import numpy
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from palettable.cartocolors.qualitative import Bold_6

from scipy.signal import find_peaks
from scipy.stats import gaussian_kde

Set default plotting theme and add color dictionary for continents.

In [3]:
sns.set_theme(
    context="paper",
    style="ticks",
    rc={
        "patch.force_edgecolor": False,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "axes.grid": True,
    },
    palette=Bold_6.hex_colors,
)

coldict = {
    'Africa': 0, 
    'Asia': 1, 
    'Europe': 2, 
    'North America': 3, 
    'Oceania': 4, 
    'South America': 5}

Load results from previous notebook

In [4]:
results = json.load(open("../results/04_peaks_results.json"))

# load parameters (metric names)
options = [
    "circular_compactness_index",
    "isoperimetric_quotient_index",
    "isoareal_quotient_index",
    "radii_ratio_index",
    "diameter_ratio_index",
]

Load the cities / urban blocks data and combine them to a single GeoDataFrame.

In [5]:
sample = geopandas.read_parquet("../data/sample.parquet")

all_poly = []
for i, row in sample.iterrows():
    fua = geopandas.read_parquet(f"../data/{int(row.eFUA_ID)}/polygons/")
    fua["continent"] = row.continent
    fua["country"] = row.Cntry_name
    fua["name"] = row.eFUA_name
    fua.crs = None
    all_poly.append(fua)
all_poly_data = pandas.concat(all_poly).reset_index(drop=True)


Plot settings and subfolders

In [6]:
mydirs = [
    "../plots/peaks/metrics/comparison",
    "../plots/peaks/metrics/circular_compactness",
    "../plots/cities/birdview",
    "..plots/cities/detail"
]

for mydir in mydirs:
    os.makedirs(mydir, exist_ok = True)

rcParams.update({'figure.autolayout': True})


Plot peak results for all 5 metrics

In [34]:
for city in all_poly_data.name.unique():

    # initiate plot
    fig, ax = plt.subplots(
        1, 5,
        figsize = (20,4), 
        sharex = True, 
        sharey = True,
        layout = "constrained")

    # get continent color from dict
    continent = numpy.max(all_poly_data[all_poly_data["name"]==city].continent)
    continent_color = sns.color_palette(n_colors = 6)[coldict[continent]]

    for i, option in enumerate(options):

        option_label = option.replace("_", " ")

        my_pdf = numpy.array(results[city][option]["pdf"])
        
        mylinspace = numpy.linspace(
            start = results[city][option]["linspace_params"]["start"], 
            stop = results[city][option]["linspace_params"]["stop"], 
            num = results[city][option]["linspace_params"]["num"])


        # plot vlines at identified peak (local max) points
        ax[i].vlines(
            x = mylinspace[results[city][option]["peaks"]],
            ymin=0,
            ymax=1,
            colors="black",
            linestyles="dotted",
            alpha = 1,
            linewidth = 1
            )

        # plot vlines at identified valley (local min) points
        ax[i].vlines(
            x = mylinspace[results[city][option]["valleys"]],
            ymin=0,
            ymax=1,
            colors="red",
            linestyles="dotted",
            alpha = 1,
            linewidth = 1
            )

        # plot pdf
        ax[i].plot(
            mylinspace,
            my_pdf, 
            color = continent_color,
            zorder = 0)


        # add scatter points at pdf/vline intersections
        ax[i].scatter(
                    x = mylinspace[results[city][option]["peaks"]], 
                    y = my_pdf[results[city][option]["peaks"]], 
                    color = "black", 
                    s = 8, 
                    alpha = 1,
                    zorder = 1
                    );
        ax[i].scatter(
                    x = mylinspace[results[city][option]["valleys"]], 
                    y = my_pdf[results[city][option]["valleys"]], 
                    color = "red", 
                    s = 8, 
                    alpha = 1,
                    zorder = 2
                    );

        ax[i].set_title(f"Peak detection in KDE based on {option_label}")
        ax[i].set_xlabel(f"log({option_label}) per area unit" )
        ax[i].set_ylabel("Probability density")
        ax[i].set_xlim([-20,20])
        ax[i].set_ylim([0,1])

    # Set the font size of the figure title
    plt.rc('figure', titlesize=24)    
    plt.suptitle(city);

    fig.savefig(
        f"../plots/peaks/metrics/comparison/{city}.png", 
        dpi = 300)
    
    plt.close()


Plot only circular compactness index metric (the one we choose to work with from now on) for each city

In [39]:
for city in all_poly_data.name.unique():
    
    # initiate plot
    fig, ax = plt.subplots(
        1, 1,
        figsize = (4,4), 
        layout = "constrained")

    # get continent color from dict
    continent = numpy.max(all_poly_data[all_poly_data["name"]==city].continent)
    continent_color = sns.color_palette(n_colors = 6)[coldict[continent]]

    option = "circular_compactness_index"
    option_label = option.replace("_", " ")

    my_pdf = numpy.array(results[city][option]["pdf"])
    
    mylinspace = numpy.linspace(
        start = results[city][option]["linspace_params"]["start"], 
        stop = results[city][option]["linspace_params"]["stop"], 
        num = results[city][option]["linspace_params"]["num"])


    # plot vlines at identified peak (local max) points
    ax.vlines(
        x = mylinspace[results[city][option]["peaks"]],
        ymin=0,
        ymax=1,
        colors="black",
        linestyles="dotted",
        alpha = 1,
        linewidth = 1
        )

    # plot vlines at identified valley (local min) points
    ax.vlines(
        x = mylinspace[results[city][option]["valleys"]],
        ymin=0,
        ymax=1,
        colors="red",
        linestyles="dotted",
        alpha = 1,
        linewidth = 1
        )

    # plot pdf
    ax.plot(
        mylinspace,
        my_pdf, 
        color = continent_color,
        zorder = 0)


    # add scatter points at pdf/vline intersections
    ax.scatter(
                x = mylinspace[results[city][option]["peaks"]], 
                y = my_pdf[results[city][option]["peaks"]], 
                color = "black", 
                s = 8, 
                alpha = 1,
                zorder = 1
                );
    ax.scatter(
                x = mylinspace[results[city][option]["valleys"]], 
                y = my_pdf[results[city][option]["valleys"]], 
                color = "red", 
                s = 8, 
                alpha = 1,
                zorder = 2
                );

    ax.set_title(f"Peak detection in KDE based on {option_label}")
    ax.set_xlabel(f"log({option_label}) per area unit" )
    ax.set_ylabel("Probability density")
    ax.set_xlim([-20,20])
    ax.set_ylim([0,1])

    # Set the font size of the figure title
    plt.rc('figure', titlesize=24)    
    plt.suptitle(city);

    fig.savefig(
        f"../plots/peaks/metrics/circular_compactness/{city}.png", 
        dpi = 300)
    
    plt.close()

Birdview plots of detected graph face artefacts, using circular compactness index threshold as metric

In [14]:
option = "circular_compactness_index"

for city in all_poly_data.name.unique():
    
    threshold = results[city][option]["threshold"]
    
    if threshold:

        # get block data for current city
        fua = all_poly_data[all_poly_data.name == city]

        # initialize figure
        fig, ax = plt.subplots(1,1,figsize = (20,20))

        # plot true urban blocks
        fua[fua["circular_compactness_index"]>threshold].plot(
            ax=ax, 
            color = "grey", 
            alpha = 0.2)
        
        # plot street artifacts
        fua[fua["circular_compactness_index"]<=threshold].plot(
            ax=ax, 
            color = "red", 
            alpha = 1)
        
        ax.set_axis_off()

        plt.rc('figure', titlesize=50)
        fig.suptitle(city.capitalize());    
        fig.savefig(f"../plots/cities/birdview/{city}.png", dpi = 300, bbox_inches = "tight")
        
        plt.close()
        print(f"Plotted {city}")
    else: # if no threshold has been found, try to estimate one
        print(f"No threshold found for {city}")

No threshold found for Gonda
No threshold found for Chongqing
No threshold found for Comilla
No threshold found for Abbottabad
Plotted Luohe
Plotted Lucknow
No threshold found for Wuhan
No threshold found for Nantong
Plotted Jaipur
No threshold found for Yongkang
Plotted Aleppo
No threshold found for Xingtai
No threshold found for Karachi
No threshold found for Qinhuangdao
Plotted Tbilisi
Plotted Mandalay
Plotted Kabul
Plotted Basra
Plotted Seoul
No threshold found for Guangzhou
No threshold found for Dhaka
Plotted Agra
No threshold found for Weifang
No threshold found for Jombang
Plotted Semarang
Plotted Accra
Plotted Oran
No threshold found for Al-Zaqaziq‎
Plotted Conakry
No threshold found for Tanta
Plotted Addis Ababa
Plotted Abidjan
Plotted Amaigbo
Plotted Ouagadougou
Plotted Kananga
No threshold found for Ibadan
Plotted Mombasa
Plotted Mogadishu
Plotted Tripoli
Plotted Cape Town
No threshold found for Fez
Plotted Khartoum
Plotted Maiduguri
Plotted Abuja
No threshold found for Aga