# Part 03: Find the discontinuities in Seattle's streets
michael babb  
2024 11 24

In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point

In [3]:
# custom
import run_constants as rc
from utils import *
from geodataio import df_operations as dfo

# load the working seattle street network data

In [4]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S03_SND_WORKING_IN_FILE_NAME)

In [5]:
gdf = gpd.read_file(filename = fpn)

In [6]:
gdf.shape

(25773, 39)

## load the manually created street group data
These data were created in Step 6, but we'll bring them into the Step 3 workflow.

In [7]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S03_STREET_GROUP_IN_FILE_NAME)

In [8]:
index_cols = ['sort_order', 'ord_street_name', 'ord_street_type', 'city_portion']
sg_df = pd.read_excel(io = fpn, index_col=[0, 1, 2, 3])
sg_df = sg_df.reset_index()

In [9]:
drop_cols = ['sort_order', 'progress', 'count']
sg_df = sg_df.drop(labels = drop_cols, axis = 1)

In [10]:
gdf = pd.merge(left = gdf, right = sg_df, how = 'left', indicator=True)
gdf['group_id'] = gdf['group_id'].fillna(1).astype(int)

In [11]:
gdf.shape

(25773, 41)

In [12]:
gdf['_merge'].value_counts()

_merge
both          19032
left_only      6741
right_only        0
Name: count, dtype: int64

In [13]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion',
       'ord_stname_unique', 'geometry', 'group_id', '_merge'],
      dtype='object')

In [14]:
# TODO: THIS IS WHERE i LEFT OFF: 2025 01 25
hey_what_is_na(gdf)

unitid
False    25716
True        57
Name: count, dtype: int64
unitid2
False    25716
True        57
Name: count, dtype: int64


In [15]:
gdf = gdf.drop(labels = ["_merge"], axis = 1)

# create an id across street groups

In [16]:
col_names = ['ord_street_name', 'ord_street_type',  'ord_stname_concat', 'ord_stname_unique', 'city_portion', 'group_id']
id_df = gdf[col_names].drop_duplicates()

In [17]:
id_df['ord_stname_type'] = id_df['ord_street_name'] + ' ' + id_df['ord_street_type']

In [18]:
id_df['sort_order'] = id_df['ord_street_name'].map(get_sort_order)

In [19]:
id_df = id_df.sort_values(by = ['sort_order', 'ord_street_type', 'city_portion'])

In [20]:
id_df.head()

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,ord_stname_unique,city_portion,group_id,ord_stname_type,sort_order
14430,1ST,AVE,1ST AVE,1ST AVE CNTR,CNTR,2,1ST AVE,1
9762,1ST,AVE,1ST AVE N,1ST AVE N,N,2,1ST AVE,1
99,1ST,AVE,1ST AVE NE,1ST AVE NE,NE,2,1ST AVE,1
32,1ST,AVE,1ST AVE NW,1ST AVE NW,NW,1,1ST AVE,1
14943,1ST,AVE,1ST AVE S,1ST AVE S,S,2,1ST AVE,1


In [21]:
id_df.shape

(2457, 8)

In [22]:
# let's get the groups! can we aggregate this?

In [23]:
col_names = ['sort_order', 'ord_street_name', 'ord_street_type', 'ord_stname_type', 'group_id', 'city_portion']
agg_id_df = id_df[col_names].groupby(col_names[:-1]).agg(city_portion_group = ('city_portion', lambda x: '_'.join(sorted(set(x)))),
                                                        n_groups = ('city_portion', lambda x: len(set(x)))).reset_index()

In [24]:
agg_id_df.head()

Unnamed: 0,sort_order,ord_street_name,ord_street_type,ord_stname_type,group_id,city_portion_group,n_groups
0,1,1ST,AVE,1ST AVE,1,NW_SW_W,3
1,1,1ST,AVE,1ST AVE,2,CNTR_N_NE_S,4
2,1,1ST,PL,1ST PL,1,NE,1
3,2,2ND,AVE,2ND AVE,1,NW_SW_W,3
4,2,2ND,AVE,2ND AVE,2,CNTR_N_NE_S,4


In [25]:
agg_id_df['city_portion_group'].unique().shape

(32,)

In [26]:
agg_id_df.loc[agg_id_df['n_groups'] > 1, ].shape

(390, 7)

In [27]:
agg_id_df.head()

Unnamed: 0,sort_order,ord_street_name,ord_street_type,ord_stname_type,group_id,city_portion_group,n_groups
0,1,1ST,AVE,1ST AVE,1,NW_SW_W,3
1,1,1ST,AVE,1ST AVE,2,CNTR_N_NE_S,4
2,1,1ST,PL,1ST PL,1,NE,1
3,2,2ND,AVE,2ND AVE,1,NW_SW_W,3
4,2,2ND,AVE,2ND AVE,2,CNTR_N_NE_S,4


In [28]:
# import the revised city_portion_group

In [29]:
fpn = os.path.join(rc.INPUT_FILE_PATH, 'city_portion_check_working.xlsx')

In [30]:
cpg_df = pd.read_excel(io = fpn)

In [31]:
# build a row selector. Less code!
row_select = cpg_df['city_portion_group_revised'].isna()

In [32]:
cpg_df.loc[row_select, 'city_portion_group_revised'] = cpg_df.loc[row_select, 'city_portion_group']

In [33]:
cpg_df.head()

Unnamed: 0,city_portion_group,ord_street_type,n_groups,city_portion_group_revised
0,CNTR,AVE,1,CNTR
1,CNTR,BR,1,CNTR
2,CNTR,CT,1,CNTR
3,CNTR,DR,1,CNTR
4,CNTR,PL,1,CNTR


In [34]:
agg_id_df.head()

Unnamed: 0,sort_order,ord_street_name,ord_street_type,ord_stname_type,group_id,city_portion_group,n_groups
0,1,1ST,AVE,1ST AVE,1,NW_SW_W,3
1,1,1ST,AVE,1ST AVE,2,CNTR_N_NE_S,4
2,1,1ST,PL,1ST PL,1,NE,1
3,2,2ND,AVE,2ND AVE,1,NW_SW_W,3
4,2,2ND,AVE,2ND AVE,2,CNTR_N_NE_S,4


In [35]:
test_join = pd.merge(left = agg_id_df, right = cpg_df, how = 'outer', indicator=True)

In [36]:
test_join['_merge'].value_counts()

_merge
both          1814
left_only        1
right_only       0
Name: count, dtype: int64

In [37]:
test_join.head()

Unnamed: 0,sort_order,ord_street_name,ord_street_type,ord_stname_type,group_id,city_portion_group,n_groups,city_portion_group_revised,_merge
0,BOREALIS,BOREALIS,AVE,BOREALIS AVE,1,CNTR,1,CNTR,both
1,ERIE,ERIE,AVE,ERIE AVE,1,CNTR,1,CNTR,both
2,EUCLID,EUCLID,AVE,EUCLID AVE,1,CNTR,1,CNTR,both
3,FULLERTON,FULLERTON,AVE,FULLERTON AVE,1,CNTR,1,CNTR,both
4,GRAND,GRAND,AVE,GRAND AVE,1,CNTR,1,CNTR,both


In [38]:
agg_id_df = test_join.drop(labels = ['city_portion_group', '_merge'], axis = 1)

In [39]:
hey_what_is_na(df = agg_id_df)    

city_portion_group_revised
False    1814
True        1
Name: count, dtype: int64


In [40]:
test_join = pd.merge(left = gdf, right = agg_id_df, how = 'outer', indicator=True)

In [41]:
test_join['_merge'].value_counts()

_merge
both          25773
left_only         0
right_only        0
Name: count, dtype: int64

In [42]:
test_join['city_portion_group'] = test_join['city_portion_group_revised']

In [43]:
gdf = test_join.drop(labels = ['_merge', 'city_portion_group_revised', 'n_groups'], axis = 1)

In [44]:
gdf.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,snd_feacode,citycode,stname_id,st_code,arterial_code,segment_type,agency_code,...,comptype,unitid,unitid2,city_portion,ord_stname_unique,geometry,group_id,sort_order,ord_stname_type,city_portion_group
0,3836,3893,4787,5,1,1955,0,1,1,1,...,68,14050,40,NW,NW 100TH PL,"LINESTRING (-122.36206 47.7035, -122.36342 47....",1,100,100TH PL,NW
1,3893,3906,4802,5,1,1955,0,1,1,1,...,68,14050,43,NW,NW 100TH PL,"LINESTRING (-122.36342 47.70275, -122.36386 47...",1,100,100TH PL,NW
2,3906,3973,4864,5,1,1955,0,1,1,1,...,68,14050,70,NW,NW 100TH PL,"LINESTRING (-122.36386 47.70251, -122.36535 47...",1,100,100TH PL,NW
3,48444,48443,49871,1,0,3767,0,0,1,1,...,0,0,0,SW,SW 100TH PL,"LINESTRING (-122.3403 47.51334, -122.34036 47....",2,100,100TH PL,SW
4,3943,3939,4836,1,1,1956,0,0,1,1,...,68,14055,240,NW,NW 100TH ST,"LINESTRING (-122.38757 47.70155, -122.38762 47...",1,100,100TH ST,NW_N_NE


In [45]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion',
       'ord_stname_unique', 'geometry', 'group_id', 'sort_order',
       'ord_stname_type', 'city_portion_group'],
      dtype='object')

In [46]:
gdf['ord_stname_type_group'] = gdf['ord_stname_type'] + ' ' + gdf['city_portion_group']

In [47]:
# export to disk
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH, output_file_name=rc.S03_CLEANED_STREET_GROUP_OUT_FILE_NAME)