## Part 02: Export nodes and select working streets
michael babb  
2025 10 03

In [1]:
# standard
import os

In [2]:
# external
from concave_hull import concave_hull
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from shapely import Point, MultiPoint

In [3]:
# custom
import run_constants as rc
from utils import create_name, generate_street_end_vertices
from geo_data_io.fc_df_spatial import keep_largest_geometry, write_gdf

# load the street network

In [4]:
# these are the streets as downloaded from the City of Seattle
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_SND_IN_FILE_NAME)

In [5]:
gdf = gpd.read_file(filename = fpn)

In [6]:
gdf.shape

(34378, 39)

# generate street end vertices - these will be helpful with identification in subsequent steps

In [7]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [8]:
write_gdf(gdf = node_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name= rc.S02_NODE_OUT_FILE_NAME)

# import the manually classified streets to update streets without a classification. 

In [9]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_BST_IN_FILE_NAME)

In [10]:
blank_street_type_df = pd.read_excel(io = fpn)

In [11]:
blank_street_type_df.head()

Unnamed: 0,ord_stname_concat,ord_street_type_fix
0,17TH AVE NW WKWY,WKWY
1,21ST AV SW TURN,RN
2,59TH AV SW TURN,ST
3,AIRPORT FY NB,SR
4,AIRPORT FY SB,SR


In [12]:
blank_street_type_df.shape

(80, 2)

In [13]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_sector',
       'ord_stname_unique', 'geometry'],
      dtype='object')

In [14]:
test_join = pd.merge(left = gdf, right = blank_street_type_df, how = 'outer', indicator = True)

In [15]:
test_join['_merge'].value_counts()

_merge
left_only     33662
both            716
right_only        0
Name: count, dtype: int64

In [16]:
gdf = test_join.copy()

In [17]:
gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type'] = gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type_fix']

In [18]:
gdf['ord_street_type'].unique()

array(['RP', 'AVE', 'LN', 'PL', 'TER', 'RD', 'CT', 'BR', 'WKWY', 'TRL',
       'RN', 'ST', 'ET', 'DR', 'CIR', 'PKWY', 'SR', 'WAY', 'AL', 'RR',
       'BLVD', 'OP', 'STCR', 'IS', 'MALL', 'PZ', 'LOOP', 'FLYOVER',
       'VIEW', 'HWY'], dtype=object)

In [19]:
gdf['ord_street_type'].isna().value_counts()

ord_street_type
False    34378
Name: count, dtype: int64

In [20]:
gdf = gdf.drop(labels = ['ord_street_type_fix', '_merge'], axis = 1)

In [21]:
gdf.shape

(34378, 39)

In [22]:
# save the complete - but revised - street network
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_SND_COMPLETE_OUT_FILE_NAME)

# keep only streets in Seattle
These are streets that begin or end in Seattle

In [23]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

In [24]:
gdf.shape

(27891, 39)

In [25]:
gdf['ord_stname_concat'].unique().shape

(2795,)

# write the full seattle streets to disk

In [26]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_SND_FULL_OUT_FILE_NAME)

# keep only streets with segment_type == 1  
These are actual streets.  
Every other segement_type is not a street (rail, trail, staircase, etc...).  
`segment_type == 15` is a stub, a dangling piece of a street, usually around 50 feet or less.  
Very short! There are 362 of them.

In [27]:
gdf['segment_type'].value_counts()

segment_type
1     25789
15      362
7       350
6       332
4       215
3       205
8       203
9       175
13      126
2        49
5        45
11       33
10        5
12        1
14        1
Name: count, dtype: int64

In [28]:
# before...
gdf.shape

(27891, 39)

In [29]:
gdf = gdf.loc[gdf['segment_type'] == 1, :].copy()
# after...
gdf.shape

(25789, 39)

In [30]:
gdf['ord_stname_unique'].unique().shape

(2462,)

# update the city sector variable

In [31]:
# we can quickly identify the central streets by performing some analysis in qGIS.
# the rc.S02_CENTRAL_STREETS_IN_FILE_NAME is the result of that analysis
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_CENTRAL_STREETS_IN_FILE_NAME)
print(fpn)
cs_df = pd.read_csv(filepath_or_buffer=fpn)

../data\city_sector.csv


In [32]:
# check
cs_df.head()

Unnamed: 0,snd_id,temp_city_sector
0,11524,CNTR
1,16184,CNTR
2,16185,CNTR
3,16187,CNTR
4,16289,CNTR


In [33]:
# join
test_join = pd.merge(left = gdf, right = cs_df, how = 'outer', indicator = True)
test_join['_merge'].value_counts()

_merge
left_only     24082
both           1707
right_only        0
Name: count, dtype: int64

In [34]:
test_join['ord_stname_unique'].unique().shape

(2462,)

In [35]:
test_join.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_sector',
       'ord_stname_unique', 'geometry', 'temp_city_sector', '_merge'],
      dtype='object')

In [36]:
# update the city city_sector variable with the temporary city sector variables
test_join.loc[test_join['_merge'] == 'both', 'city_sector'] = \
test_join.loc[test_join['_merge'] == 'both', 'temp_city_sector']

In [37]:
# update the unique name
test_join['ord_stname_unique'] = test_join.apply(func = create_name, axis = 1)

In [38]:
# drop and overwrite
drop_col_names = ['temp_city_sector', '_merge']
gdf = test_join.drop(labels = drop_col_names, axis = 1).copy()

# draw a convex hull and a concave hull around each group of city streets

In [39]:
data_list = []
geom_list = []
for cp in gdf['city_sector'].unique().tolist():
    print(cp)
    temp_geoms = gdf.loc[gdf['city_sector'] == cp, 'geometry']
    
    # get the vertices of the line strings
    # as shapely coordinates
    geom_point_list = []
    # as an x-y tuples
    point_list = []
    for curr_line in temp_geoms:
        for curr_coords in curr_line.coords:
            geom_point_list.append(Point(curr_coords))
            point_list.append(curr_coords)
    
    # create the cnvx_hull
    cnvx_hull = shapely.convex_hull(geometry = MultiPoint(geom_point_list))
        
    temp_list = [cp, 'convex']
    data_list.append(temp_list)
    geom_list.append(cnvx_hull)

    # create a concave hull
    cncv_hull = concave_hull(points = point_list)
    # create a shapely polygon
    cncv_poly = shapely.Polygon(cncv_hull)
    data_list.append([cp, 'concave'])
    geom_list.append(cncv_poly)

NW
N
NE
CNTR
E
W
S
SW


In [40]:
# build a geodataframe
output_gdf = gpd.GeoDataFrame(data = data_list, columns = ['city_sector', 'hull_type'], geometry = geom_list, crs = 'epsg:4326')
# fix any odd geometries
output_gdf['geometry'] = output_gdf['geometry'].make_valid()

In [41]:
# now, let's project to WGS 84 UTM Zone 10N to perform some area calculations

In [42]:
output_gdf = output_gdf.to_crs(epsg=32610)

In [43]:
output_gdf.head()

Unnamed: 0,city_sector,hull_type,geometry
0,NW,convex,"POLYGON ((548235.329 5277837.611, 548223.346 5..."
1,NW,concave,"MULTIPOLYGON (((548260.471 5278459.705, 548236..."
2,N,convex,"POLYGON ((550121.707 5274117.103, 550025.382 5..."
3,N,concave,"MULTIPOLYGON (((548236.706 5277549.869, 548236..."
4,NE,convex,"POLYGON ((552225.631 5277355.758, 551813.827 5..."


In [44]:
output_gdf = keep_largest_geometry(gdf = output_gdf, group_col_names=['city_sector', 'hull_type'])

In [45]:
output_gdf.head()

Unnamed: 0,city_sector,hull_type,geometry
0,NW,convex,"POLYGON ((548235.329 5277837.611, 548223.346 5..."
1,NW,concave,"POLYGON ((544560.158 5280213.434, 544559.88 52..."
2,N,convex,"POLYGON ((550121.707 5274117.103, 550025.382 5..."
3,N,concave,"POLYGON ((548236.706 5277549.869, 548236.48 52..."
4,NE,convex,"POLYGON ((552225.631 5277355.758, 551813.827 5..."


In [46]:
# re-project back to wgs-84
output_gdf = output_gdf.to_crs(epsg = 4326)

In [47]:
# save it!
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_CITY_SECTORS_OUT_FILE_NAME)

In [48]:
# let's export this to a LineString file as well
output_gdf['geometry'] = output_gdf['geometry'].boundary
# fix any incorrect geometry
output_gdf['geometry'] = output_gdf['geometry'].make_valid()

In [49]:
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH, 
          output_file_name=rc.S02_CITY_SECTORS_LINES_OUT_FILE_NAME)

# remove the following streets types:

AL: alley  
TRL: trail  
OP: overpass  
IS: interstate  
SR: state route  
RR: rail  
FLYOVER: flyover  
STCR: streetcar  
ET: extension  
RN: turn  
RP: highway ramps  
WKWY: walkways  

In [50]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER',
                         'STCR', 'ET', 'RN', 'RP', 'WKWY']

In [51]:
# before...
gdf.shape

(25789, 39)

In [52]:
# what street types are currently in the gdf?
gdf['ord_street_type'].unique()

array(['ST', 'AVE', 'RD', 'WAY', 'PL', 'LN', 'DR', 'CT', 'CIR', 'PZ',
       'BLVD', 'LOOP', 'BR', 'PKWY', 'TER', 'ET', 'VIEW', 'OP', 'FLYOVER',
       'RN', 'WKWY', 'TRL', 'SR'], dtype=object)

In [53]:
# remove...
gdf = gdf.loc[-gdf['ord_street_type'].isin(street_type_to_remove), :].copy()

In [54]:
# after..
gdf.shape

(25774, 39)

In [55]:
# what street types are left?
gdf['ord_street_type'].unique()

array(['ST', 'AVE', 'RD', 'WAY', 'PL', 'LN', 'DR', 'CT', 'CIR', 'PZ',
       'BLVD', 'LOOP', 'BR', 'PKWY', 'TER', 'VIEW'], dtype=object)

# remove other short segments that appear to be invalid.

In [56]:
# uncomment to remove the erroneous segments
err_segments = pd.read_csv(filepath_or_buffer='../data/streets_to_remove.txt')
err_segments.shape

(3, 2)

In [57]:
# examine the erroneous segments
tgdf = gdf.loc[gdf['snd_id'].isin(err_segments['snd_id']), :].copy()
tgdf['gis_seg_length'].describe()

# very short!

count     3.000000
mean     17.774578
std       4.845671
min      14.905917
25%      14.977231
50%      15.048545
75%      19.208908
max      23.369271
Name: gis_seg_length, dtype: float64

In [58]:
# check names...
tgdf['ord_stname_concat'].unique().tolist()

['15TH AVE W', 'CECIL AVE S', '18TH AVE S']

In [59]:
tgdf['segment_type'].unique()

array([1])

In [60]:
tgdf.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,snd_feacode,citycode,stname_id,st_code,arterial_code,segment_type,agency_code,...,r_state,r_zip,sndseg_update,compkey,comptype,unitid,unitid2,city_sector,ord_stname_unique,geometry
25142,46825,8049,47787,1,1,65,40,0,1,3,...,WA,98119,2009-05-22 00:00:00+00:00,0,0,,,W,15TH AVE W,"LINESTRING (-122.37617 47.6595, -122.37624 47...."
25202,47101,18236,48119,0,1,717,40,0,1,3,...,WA,98118,2010-07-15 00:00:00+00:00,0,0,,,S,CECIL AVE S,"LINESTRING (-122.28771 47.52329, -122.28766 47..."
25334,47413,13517,48499,0,1,93,40,0,1,3,...,WA,98144,2013-03-25 00:00:00+00:00,0,0,0.0,0.0,S,18TH AVE S,"LINESTRING (-122.31011 47.58373, -122.3101 47...."


In [61]:
# so short!
tgdf['gis_seg_length'].head()

25142    23.369271
25202    14.905917
25334    15.048545
Name: gis_seg_length, dtype: float64

In [62]:
# remove the erroneous segments
gdf = gdf.loc[-gdf['snd_id'].isin(err_segments['snd_id']), :].copy()

In [63]:
# after
gdf.shape

(25771, 39)

In [64]:
# unique roads
gdf['ord_stname_unique'].unique().shape

(2457,)

In [65]:
# total miles in Seattle, based on select road ways
gdf['gis_seg_length'].sum() / 5280

np.float64(1892.4954813380684)

# save as a gdf - overwrite

In [66]:
write_gdf(gdf = gdf, output_file_path = rc.OUTPUT_FILE_PATH,
          output_file_name =  rc.S02_SND_WORKING_OUT_FILE_NAME)