## Setup

In [None]:
# Base libraries
import pandas as pd
import hdbscan
import umap
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# NetworkX
import networkx as nx
import osmnx as ox
# OS environment setup
from local_directories import *

## Load data

In [None]:
# Load Leciester's graph
leicester_osmnx_graph = ox.io.load_graphml(bulk_storage_directory + "/osmnx/raw_excluded/leicester-1864.graphml")
leicester_osmnx_graph_prj = ox.project_graph(leicester_osmnx_graph)

In [None]:
len(list(leicester_osmnx_graph.nodes))

In [None]:
ox.plot_graph(
    leicester_osmnx_graph_prj,
    node_size=5, node_color="#000000",
    edge_color="#000000", edge_linewidth=0.1,
    bgcolor="#ffffff",
    figsize=(16, 16))

In [None]:
# Convert graph to dataframe version
leicester_osmnx_graph_prj_df = None
for node in leicester_osmnx_graph_prj:
    node_dict = leicester_osmnx_graph_prj.nodes[node]
    node_dict["osmnx_node_id"] = int(node)
    # node_dict["osmnx_node_id"] = str(node)
    if leicester_osmnx_graph_prj_df is None:
        leicester_osmnx_graph_prj_df = pd.DataFrame.from_dict([node_dict])
    else:
        leicester_osmnx_graph_prj_df = pd.concat([leicester_osmnx_graph_prj_df, pd.DataFrame.from_dict([node_dict])])
leicester_osmnx_graph_prj_df.head()

In [None]:
# Load Leciester's base stats
leicester_osmnx_basic_stats = pd.read_csv(this_repo_directory + "/data/leicester-1864_stats_egograph_basic_dist500.csv")
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.rename(columns={"node_id":"osmnx_node_id"})
# Drop NAs created when ego-graph has less than 8 nodes
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.dropna(subset=["osmnx_node_id"])
leicester_osmnx_basic_stats.head()

In [None]:
# Load Leciester's embeddings
leicester_emb_df = pd.read_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-8-1.csv")
leicester_emb_df.head()

In [None]:
leicester_emb_only = leicester_emb_df.drop(['osmnx_node_id'], axis=1)
leicester_emb_only.shape

## Explore embeddings

In [None]:
# https://umap-learn.readthedocs.io/en/latest/api.html
reducer = umap.UMAP(
        n_neighbors=256,
        min_dist=0.0,
        #n_components=2,
        metric='cosine'
    )
reduced_embedding = reducer.fit_transform(leicester_emb_only)

In [None]:
fig = px.scatter(
    leicester_emb_df,
    x=reduced_embedding[:, 0],
    y=reduced_embedding[:, 1],
    #hover_data=['osmnx_node_id'],
    width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

In [None]:
leicester_emb_df = pd.concat([
    leicester_emb_df, 
    pd.DataFrame(
        reduced_embedding, 
        columns=['UMAP0', 'UMAP1']
    ).reindex(leicester_emb_df.index)
], axis=1)
leicester_emb_df.head()

In [None]:
fig = px.scatter(
    leicester_emb_df,
    x="UMAP0",
    y="UMAP1",
    hover_data=['osmnx_node_id'],
    width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

In [None]:
for node in leicester_osmnx_graph_prj.nodes:
    if len(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values) == 0:
        leicester_osmnx_graph_prj.nodes[node]["UMAP0"] = None
        leicester_osmnx_graph_prj.nodes[node]["UMAP1"] = None
    else:
        leicester_osmnx_graph_prj.nodes[node]["UMAP0"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["UMAP0"].values)
        leicester_osmnx_graph_prj.nodes[node]["UMAP1"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["UMAP1"].values)

In [None]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["UMAP0"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))

In [None]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["UMAP1"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))

## Correlations with basic stats

Checking correlations between EMB000 and EMB001 and all the basic statistics from OSMnx. No clear correlation found.

In [None]:
leicester_closeness_centrality = pd.DataFrame.from_dict(
    nx.closeness_centrality(leicester_osmnx_graph),
    orient='index',
    columns=['closeness_networkwide'])
leicester_closeness_centrality['osmnx_node_id'] = leicester_closeness_centrality.index
leicester_closeness_centrality.head()

In [None]:
leicester_betweenness_centrality = pd.DataFrame.from_dict(
    nx.betweenness_centrality(leicester_osmnx_graph),
    orient='index',
    columns=['betweenness_networkwide'])
leicester_betweenness_centrality['osmnx_node_id'] = leicester_betweenness_centrality.index
leicester_betweenness_centrality.head()

In [None]:
leicester_pairplot_df = leicester_osmnx_basic_stats[
    ["osmnx_node_id","n", "m", "k_avg", "edge_length_total", "edge_length_avg",
    "streets_per_node_avg", "intersection_count", "street_length_total",
    "street_segment_count", "street_length_avg", "circuity_avg"]].merge(
        leicester_closeness_centrality,
        on="osmnx_node_id"
    ).merge(
        leicester_betweenness_centrality,
        on="osmnx_node_id"
    ).merge(
        leicester_emb_df[["osmnx_node_id", "EMB000", "EMB001"]],
        on="osmnx_node_id"
    )

In [None]:
leicester_pairplot_df_withpooled = pd.merge(
    leicester_pairplot_df,
    pd.read_csv(this_repo_directory + "/data/leicester-1864_emb-pool_gnnuf_model_v0-5.csv").rename(columns={"EMB000":"EMB000pooled", "EMB001":"EMB001pooled"}),
    on="osmnx_node_id"
    )

In [None]:
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="kendall"))

In [None]:
# Double-checking difference with Spearman's rho
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="spearman"))

In [None]:
sns.pairplot(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]), kind="hist")

In [None]:
leicester_pairplot_df_withpooled.to_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5_with-node-and-basic-stats.csv", index=False)

## Check specific nodes

Checking the ego-network of the nodes with the most extreme values in each direction (min and max) for both UMAP0 and UMAP1

In [None]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 6782625866]

In [None]:
ego_6782625866 = nx.generators.ego_graph(
    leicester_osmnx_graph, 6782625866,
    radius=max_distance, undirected=True, distance="length")
ego_6782625866_prj = ox.project_graph(ego_6782625866)
ox.plot_graph(
    ego_6782625866_prj,
    node_size=[20 if node == 6782625866 else 5 for node in ego_6782625866_prj.nodes],
    node_color=["#e41a1c" if node == 6782625866 else "#ffffff" for node in ego_6782625866_prj.nodes],
    figsize=(5, 5)
)

In [None]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 354554417]

In [None]:
ego_354554417 = nx.generators.ego_graph(
    leicester_osmnx_graph, 354554417,
    radius=max_distance, undirected=True, distance="length")
ego_354554417_prj = ox.project_graph(ego_354554417)
ox.plot_graph(
    ego_354554417_prj,
    node_size=[20 if node == 354554417 else 5 for node in ego_354554417_prj.nodes],
    node_color=["#e41a1c" if node == 354554417 else "#ffffff" for node in ego_354554417_prj.nodes],
    figsize=(5, 5)
)

In [None]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 1179199412]

In [None]:
ego_1179199412 = nx.generators.ego_graph(
    leicester_osmnx_graph, 1179199412,
    radius=max_distance, undirected=True, distance="length")
ego_1179199412_prj = ox.project_graph(ego_1179199412)
ox.plot_graph(
    ego_1179199412_prj,
    node_size=[20 if node == 1179199412 else 5 for node in ego_1179199412_prj.nodes],
    node_color=["#e41a1c" if node == 1179199412 else "#ffffff" for node in ego_1179199412_prj.nodes],
    figsize=(5, 5)
)

In [None]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 2858142815]

In [None]:
ego_2858142815 = nx.generators.ego_graph(
    leicester_osmnx_graph, 2858142815,
    radius=max_distance, undirected=True, distance="length")
ego_2858142815_prj = ox.project_graph(ego_2858142815)
ox.plot_graph(
    ego_2858142815_prj,
    node_size=[20 if node == 2858142815 else 5 for node in ego_2858142815_prj.nodes],
    node_color=["#e41a1c" if node == 2858142815 else "#ffffff" for node in ego_2858142815_prj.nodes],
    figsize=(5, 5)
)

In [None]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 296162322]

In [None]:
ego_296162322 = nx.generators.ego_graph(
    leicester_osmnx_graph, 296162322,
    radius=max_distance, undirected=True, distance="length")
ego_296162322_prj = ox.project_graph(ego_296162322)
ox.plot_graph(
    ego_296162322_prj,
    node_size=[20 if node == 296162322 else 5 for node in ego_296162322_prj.nodes],
    node_color=["#e41a1c" if node == 296162322 else "#ffffff" for node in ego_296162322_prj.nodes],
    figsize=(5, 5)
)

## Explore patterns

In [None]:
leicester_emb_patters_df = leicester_emb_df.copy()

In [None]:
# from sklearn.cluster import DBSCAN
# leicester_emb_df_clust = leicester_emb_df[["UMAP0", "UMAP1"]].dropna()
# clust = DBSCAN(eps=1.0, min_samples=300)
# leicester_emb_patters_df["clust"] = clust.fit_predict(leicester_emb_df_clust)
# leicester_emb_patters_df["clust"].nunique()

In [None]:
leicester_emb_df_clust = leicester_emb_df[["UMAP0", "UMAP1"]].dropna()
clusterer = hdbscan.HDBSCAN(min_cluster_size=100, gen_min_span_tree=True)
leicester_emb_patters_df["clust"] = clusterer.fit_predict(leicester_emb_df_clust)
leicester_emb_patters_df["clust"].nunique()

In [None]:
colorbrewer_set1 = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999"]
colorbrewer_set12p = ["#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a", "#ffff99", "#b15928"]
leicester_emb_patters_df["clust_colour"] = leicester_emb_patters_df["clust"].apply(lambda x: colorbrewer_set12p[x])
leicester_emb_patters_df.head()

In [None]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.UMAP0,
    y=leicester_emb_patters_df.UMAP1,
    c=leicester_emb_patters_df.clust_colour,
    s=5, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.clust_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

In [None]:
def bivariate_colour(x, limits):
    if x[0] is None or x[1] is None:
        return None
    else:
        if x[0] <= limits[0, 0]:
            if x[1] <= limits[1, 0]:
                # return "#e8e8e8"
                return "#e8e8e8"
            elif x[1] <= limits[1, 1]:
                # return "#cbb8d7"
                return "#e4acac"
            else:
                # return "#9972af"
                return "#c85a5a"
        if x[0] <= limits[0, 1]:
            if x[1] <= limits[1, 0]:
                # return "#e4d9ac"
                return "#b0d5df"
            elif x[1] <= limits[1, 1]:
                # return "#c8ada0"
                return "#ad9ea5"
            else:
                # return "#976b82"
                return "#985356"
        else:
            if x[1] <= limits[1, 0]:
                # return "#c8b35a"
                return "#64acbe"
            elif x[1] <= limits[1, 1]:
                # return "#af8e53"
                return "#627f8c"
            else:
                # return "#804d36"
                return "#574249"

leicester_emb_quantiles = leicester_emb_df[["EMB000", "EMB001"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_patters_df["bivariate_colour"] = leicester_emb_patters_df.apply(
    lambda x: bivariate_colour([x["EMB000"], x["EMB001"]], leicester_emb_quantiles), axis=1
)
leicester_emb_patters_df.head()

In [None]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    c=leicester_emb_patters_df.bivariate_colour,
    s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.bivariate_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

In [None]:
leicester_osmnx_bivariate = leicester_osmnx_graph_prj.copy()

for node in leicester_osmnx_bivariate.nodes:
    node_bivariate_colour = leicester_emb_patters_df.loc[leicester_emb_patters_df["osmnx_node_id"] == node]
    if node_bivariate_colour.empty:
        #leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 1
    else:
        #leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = node_bivariate_colour["bivariate_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = node_bivariate_colour["clust_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 7

In [None]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 1 for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))

In [None]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*8 if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 8 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(48, 48))

In [None]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"] for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))

In [None]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*2 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(24, 24))

In [None]:
import geopandas as gpd
leicester_gdf = gpd.GeoDataFrame(
    leicester_osmnx_graph_prj_df,
    geometry=gpd.points_from_xy(
        leicester_osmnx_graph_prj_df.lon,
        leicester_osmnx_graph_prj_df.lat
    ),
    crs="EPSG:4326"
).merge(leicester_emb_patters_df, on='osmnx_node_id', how='left').merge(leicester_osmnx_basic_stats, on='osmnx_node_id', how='left')
leicester_gdf.head()

In [None]:
leicester_gdf[leicester_gdf["clust_colour"]!=colorbrewer_set1[-1]].dropna(subset=["EMB000"]).explore(
    color="clust_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    tiles="Stamen Toner"
)

In [None]:
leicester_gdf[leicester_gdf["bivariate_colour"]!="#000000"].dropna(subset=["EMB000"]).explore(
    color="bivariate_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    legend=True,
    tiles="Stamen Toner"
)