In [1]:
import geopandas as gpd
import pandas as pd
import sys
import os

import numpy as np
import matplotlib.pyplot as plt

In [2]:
import torch

num_of_gpus = torch.cuda.device_count()
print(num_of_gpus)

1


In [3]:
from tqdm import tqdm_notebook

In [4]:
from scipy.spatial.distance import pdist

In [5]:
nycnodes = os.path.join("data","new-york","nodes.geojson")
nodes = gpd.read_file(nycnodes)

In [6]:
# node_points = np.array([nodes.geometry.x, nodes.geometry.y]).T

In [7]:
# node_points

In [8]:
# distances = pdist(node_points)

In [9]:
# plt.hist(distances, bins=500, color="black")

# plt.title("Histogram of Distances between Images")
# plt.xlabel("Meters")
# plt.ylabel("Count")
# plt.show()

In [10]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [11]:
from src.neighbor_identification import neighbors

In [12]:
import networkx as nx

In [13]:
%load_ext autoreload
%autoreload 2

In [14]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

In [15]:
neighbor_radius = 150
building_buffer = 210

In [16]:
from src.graph_construction import graph
nycgraph, rebuild_info = graph(
    "new-york",
    neighbor_radius = neighbor_radius,
    building_buffer = building_buffer,
    test_percent=15
)

In [17]:
nycgraph

HeteroData(
  [1mpano[0m={ x=[102218, 3] },
  [1mfootprint[0m={
    x=[13656, 3],
    y=[13656],
    train_mask=[13656],
    test_mask=[13656]
  },
  [1m(footprint, contains, pano)[0m={ edge_index=[2, 2166468] },
  [1m(pano, links, pano)[0m={ edge_index=[2, 8250460] },
  [1m(pano, rev_contains, footprint)[0m={ edge_index=[2, 2166468] }
)

In [18]:
def lcc_vis(node_data, radius_list = np.arange(0,200,5)):
    connected_components = []

    for radius in tqdm_notebook(radius_list):
        edge_list = neighbors(
            node_data,
            radius = radius
        )
        G = nx.from_edgelist(edge_list.detach().cpu().numpy().T)
        G.remove_nodes_from(list(nx.isolates(G)))
        graph_components = list(nx.connected_components(G))
        connected_components.append(len(graph_components))

#     plt.step(
#         radius_list[1:],
#         connected_components[1:],
#         color="black"
#     )

    return radius_list[1:], connected_components[1:]

In [None]:
austingraph, austin_rebuild_info = graph(
    "austin",
    neighbor_radius = neighbor_radius,
    building_buffer = building_buffer,
    test_percent=15
)

In [None]:
sfgraph, sf_rebuild_info = graph(
    "san-fransisco",
    neighbor_radius = neighbor_radius,
    building_buffer = building_buffer,
    test_percent=15
)

In [None]:
sfgraph

In [None]:
sX, sY = lcc_vis(sf_rebuild_info['node_data_original'])

In [None]:
aX, aY = lcc_vis(austin_rebuild_info['node_data_original'])

In [None]:
nX, nY = lcc_vis(rebuild_info['node_data_original'])

In [None]:
# Plotting both the curves simultaneounode_data_original= 
fig, ax = plt.subplots(figsize=(8,6), dpi=500)

ax.step(sX, sY, color="orange", label="San Fransisco")
ax.step(aX, aY, color='gray', label='Austin')
ax.step(nX, nY, color='black', label='New York')
  
plt.yscale("log")

plt.xlabel("Radius of Connection (m)")
plt.ylabel("Number of Connected Components")
plt.title("Radius vs. Connected Components")

plt.legend()
plt.savefig("connected-component-history.png")

plt.show()

In [None]:
import copy
import matplotlib
from matplotlib_scalebar.scalebar import ScaleBar

color_thing = plt.cm.jet

fig, (ax1, ax2) = plt.subplots(
    1, 2, 
    sharex=True, 
    sharey=True, 
    figsize=(9, 7),
    dpi=300
)
plt.locator_params(axis='x', nbins=4)

### section 1 - showing the connected components of 10 meter radius
test_radius = 50
edge_list = neighbors(
    rebuild_info['node_data_original'],
    radius = test_radius
)
G = nx.from_edgelist(edge_list.detach().cpu().numpy().T)
graph_components = list(nx.connected_components(G))

color_arr = np.repeat(["#FF0000"], len(rebuild_info['node_data_original']))

for c,component in enumerate(graph_components):
    cterm = c / len(graph_components)
    color_arr[np.array(list(component))] = matplotlib.colors.to_hex(
        color_thing(cterm), 
        keep_alpha=True
    )
    
nodedata = rebuild_info['node_data_original']
nodedata.plot(markersize=0.2, ax=ax1, color=color_arr)
ax1.set_title(f"Connected Components - {test_radius}m Radius")

### section 2 - showing the connected components of larger radius
test_radius = 150
edge_list = neighbors(
    rebuild_info['node_data_original'],
    radius = test_radius
)
G = nx.from_edgelist(edge_list.detach().cpu().numpy().T)
graph_components = list(nx.connected_components(G))

color_arr = np.repeat(["#FF0000"], len(rebuild_info['node_data_original']))

for c,component in enumerate(graph_components):
    cterm = c / len(graph_components)
    color_arr[np.array(list(component))] = matplotlib.colors.to_hex(
        color_thing(cterm), 
        keep_alpha=True
    )
    
nodedata.plot(markersize=0.2, ax=ax2, color=color_arr)
ax2.set_title(f"Connected Components - {test_radius}m Radius")

ax1.add_artist(ScaleBar(1))
ax2.add_artist(ScaleBar(1))

plt.tight_layout()

plt.savefig('nyc_components.png')
plt.show()

In [None]:
G = nx.from_edgelist(nycgraph['pano','links','pano']['edge_index'].detach().cpu().numpy().T)
G.remove_nodes_from(list(nx.isolates(G)))

In [None]:
plt.hist(nx.degree_histogram(G), bins=50)

plt.title("Degree Histogram - New York")
plt.show()

In [None]:
def graph_statistics(graphobject, graphname):
    G = nx.from_edgelist(graphobject['pano','links','pano']['edge_index'].detach().cpu().numpy().T)
#     G.remove_nodes_from(list(nx.isolates(G)))
    
    return {
        "Density": nx.density(G),
        "Edges": nx.number_of_edges(G),
        "Nodes": nx.number_of_nodes(G),
        "Assortivity": nx.degree_assortativity_coefficient(G),
        "Transitivity": nx.transitivity(G)
        # "Omega": nx.omega(G)
    }

In [None]:
nyc_nxgraph = nx.from_edgelist(nycgraph['pano','links','pano']['edge_index'].detach().cpu().numpy().T)

In [None]:
## also to compare a bit against a random, er graph with the same number of edges and nodes as nyc
total_possible_edges = (nx.number_of_nodes(nyc_nxgraph) * ((nx.number_of_nodes(nyc_nxgraph) - 1))) / 2
edge_probability = nx.number_of_edges(nyc_nxgraph) / total_possible_edges

random_graph = nx.erdos_renyi_graph(nycgraph.num_nodes, edge_probability)

In [None]:
nx.number_of_nodes(random_graph)

In [None]:
nycgraph.num_nodes

In [None]:
randomstats = {
    "Density": nx.density(random_graph), 
    "Edges": nx.number_of_edges(random_graph),
    "Nodes": nx.number_of_nodes(random_graph),
    "Assortivity": nx.degree_assortativity_coefficient(random_graph),
    "Transitivity": nx.transitivity(random_graph),
    # "Omega": nx.omega(random_graph)
}
randomstats

In [None]:
nystats = graph_statistics(nycgraph, "New York")
nystats

In [None]:
sfstats = graph_statistics(sfgraph, "San Fransisco")
sfstats

In [None]:
austats = graph_statistics(austingraph, "Austin")
austats

In [None]:
statsdf = pd.DataFrame([randomstats, nystats, sfstats, austats], index=['Erdős–Rényi','New York','San Fransisco','Austin'])

In [None]:
with pd.option_context('display.float_format', '{:,.3f}'.format):
   print(statsdf.to_latex(index=False))

In [None]:
print(statsdf.to_latex(index=False))