# Part 09: Compare the edge differences between v1.0 and v2.0 of the added edges
michael babb  
2024 11 24

In [None]:
# standard
import os

In [None]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point

In [None]:
# custom
import run_constants as rc
from utils import *

# load the working seattle street network data

In [None]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, 'missing_segments_v1.gpkg')

In [None]:
gdf_v1 = gpd.read_file(filename = fpn)
# we only want the added edges
gdf_v1 = gdf_v1.loc[gdf_v1['street_status'] == 2, :].copy()

In [None]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, 'missing_segments_v2.gpkg')

In [None]:
gdf_v2 = gpd.read_file(filename = fpn)
gdf_v2 = gdf_v2.loc[gdf_v2['street_status'] == 2, :].copy()

In [None]:
gdf_v1['street_status'].value_counts()

In [None]:
gdf_v2['street_status'].value_counts()

In [None]:
# so, there are the same number of edges... but how have the edges changed?
# the three things we need are the sn_id, the en_id, and the edge name.
def build_hashed_value(row):
    sn_id = row['sn_id']
    en_id = row['en_id']
    ord_stname_concat = row['ord_stname_concat']
    node_list = [sn_id, en_id]
    # sort it
    sn_id, en_id = sorted(node_list)
    id_tuple = (sn_id, en_id, ord_stname_concat)
    return hash(id_tuple)

In [None]:
gdf_v1['hashed_id'] = gdf_v1.apply(func = build_hashed_value, axis=1)

In [None]:
gdf_v2['hashed_id'] = gdf_v2.apply(func = build_hashed_value, axis=1)

In [None]:
gdf_v1_diff = gdf_v1.loc[-gdf_v1['hashed_id'].isin(gdf_v2['hashed_id']), :].copy()

In [None]:
gdf_v1_diff.shape

In [None]:
gdf_v2_diff = gdf_v2.loc[-gdf_v2['hashed_id'].isin(gdf_v1['hashed_id']), :].copy()

In [None]:
gdf_v2_diff.shape

In [None]:
gdf_v1_diff['version'] = 1
gdf_v2_diff['version'] = 2

In [None]:
v_gdf = pd.concat(objs=[gdf_v1_diff, gdf_v2_diff], axis = 0)

In [None]:
write_gdf(gdf = v_gdf, output_file_path=rc.OUTPUT_FILE_PATH, output_file_name='v1_v2_edge_diff.gpkg')

In [None]:
# now, let's find the differences in the edges
v_gdf['version'].value_counts()

In [None]:
v_gdf[['version', 'dist_miles']].groupby(['version']).agg(dist_miles = ('dist_miles', 'sum'))

In [None]:
# using version 2, the number of roads is shortened by half a mile. Makes sense...
# get rid of the stubs, fewer connectivities. Awesome. Forward.