# Part 07: Identify streets that cross sectors, plot individual streets
michael babb  
2025 10 03 

In [1]:
# standard
import os
import re
import collections

In [2]:
# external
import geopandas as gpd
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# custom
import run_constants as rc
from utils import *

In [4]:
# process flow variables

# load the street network data

In [5]:
rc.S03_SND_WORKING_IN_FILE_NAME

'Street_Network_Database_Seattle_working.gpkg'

In [6]:
# load the list of streets - this is before any work is done to compute the missing segments. 
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S03_SND_WORKING_IN_FILE_NAME)

In [7]:
gdf = gpd.read_file(filename = fpn)

In [8]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_sector',
       'ord_stname_unique', 'geometry'],
      dtype='object')

In [9]:
gdf['n_roads'] = int(1)

In [10]:

gdf['ord_stname_concat'].unique().shape

(2455,)

In [11]:
gdf['ord_stname_unique'].unique().shape

(2457,)

# group data by street type for use in version 2.0

In [12]:
col_names = ['ord_street_name', 'ord_street_type', 'ord_stname_concat','ord_stname_unique',  'city_sector']
st_name_type_df = gdf[col_names].drop_duplicates()

In [13]:
my_counter = collections.Counter(st_name_type_df['ord_stname_unique'])


In [14]:
my_counter.most_common()

[('NW 145TH ST', 1),
 ('N 145TH ST', 1),
 ('NE 145TH ST', 1),
 ('AURORA AVE N', 1),
 ('SHERWOOD RD NW', 1),
 ('3RD AVE NW', 1),
 ('1ST AVE NW', 1),
 ('NW 144TH ST', 1),
 ('GREENWOOD AVE N', 1),
 ('N 144TH ST', 1),
 ('FREMONT AVE N', 1),
 ('MIDVALE AVE N', 1),
 ('ROOSEVELT WAY N', 1),
 ('LENORA PL N', 1),
 ('MERRILL LN NW', 1),
 ('NW 143RD ST', 1),
 ('STONE AVE N', 1),
 ('N 143RD ST', 1),
 ('NW CULBERTSON DR', 1),
 ('HILL TOP LN NW', 1),
 ('PHINNEY AVE N', 1),
 ('DAYTON AVE N', 1),
 ('EVANSTON AVE N', 1),
 ('LINDEN AVE N', 1),
 ('INTERLAKE AVE N', 1),
 ('ROSLYN PL N', 1),
 ('ASHWORTH AVE N', 1),
 ('DENSMORE AVE N', 1),
 ('COURTLAND PL N', 1),
 ('WALLINGFORD AVE N', 1),
 ('BURKE AVE N', 1),
 ('WAYNE PL N', 1),
 ('MERIDIAN AVE N', 1),
 ('BAGLEY AVE N', 1),
 ('CORLISS AVE N', 1),
 ('SUNNYSIDE AVE N', 1),
 ('1ST AVE NE', 1),
 ('4TH AVE NE', 1),
 ('N 142ND ST', 1),
 ('ALPINE WAY NW', 1),
 ('NW NORTHWOOD RD', 1),
 ('N 141ST ST', 1),
 ('N 141ST CT', 1),
 ('2ND AVE NW', 1),
 ('NW 140TH ST', 1),

In [15]:
# the number of unique streets
st_name_type_df.shape

(2457, 5)

In [16]:
st_name_type_df = st_name_type_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'ord_stname_concat'])

In [17]:
st_name_type_df.head(n=10)

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,ord_stname_unique,city_sector
1013,100TH,PL,NW 100TH PL,NW 100TH PL,NW
25614,100TH,PL,SW 100TH PL,SW 100TH PL,SW
1101,100TH,ST,N 100TH ST,N 100TH ST,N
2842,100TH,ST,NE 100TH ST,NE 100TH ST,NE
1055,100TH,ST,NW 100TH ST,NW 100TH ST,NW
22712,100TH,ST,S 100TH ST,S 100TH ST,S
21026,100TH,ST,SW 100TH ST,SW 100TH ST,SW
25612,101ST,PL,SW 101ST PL,SW 101ST PL,SW
1040,101ST,ST,N 101ST ST,N 101ST ST,N
1030,101ST,ST,NW 101ST ST,NW 101ST ST,NW


In [18]:
# make some ids - ord_street_name
osn_df = st_name_type_df['ord_street_name'].drop_duplicates().to_frame().sort_values(by = 'ord_street_name').reset_index(drop = True)
osn_df['osn_id'] = range(0, osn_df.shape[0])
osn_df.head()

Unnamed: 0,ord_street_name,osn_id
0,100TH,0
1,101ST,1
2,102ND,2
3,103RD,3
4,104TH,4


In [19]:
# ord_street_name and ord_street_type
col_names = ['ord_street_name', 'ord_street_type']
osn_ost_df = st_name_type_df[col_names].drop_duplicates().sort_values(by = col_names).reset_index(drop = True)
osn_ost_df['osn_ost'] = osn_ost_df['ord_street_name'] + ' ' + osn_ost_df['ord_street_type']
osn_ost_df['osn_ost_id'] = range(0, osn_ost_df.shape[0])
osn_ost_df.head()


Unnamed: 0,ord_street_name,ord_street_type,osn_ost,osn_ost_id
0,100TH,PL,100TH PL,0
1,100TH,ST,100TH ST,1
2,101ST,PL,101ST PL,2
3,101ST,ST,101ST ST,3
4,102ND,LN,102ND LN,4


In [20]:
# ord_stname_concat
ostc_df = st_name_type_df['ord_stname_concat'].drop_duplicates().to_frame().sort_values(by = 'ord_stname_concat').reset_index(drop = True)
ostc_df['ostc_id'] = range(0, ostc_df.shape[0])
ostc_df.head()


Unnamed: 0,ord_stname_concat,ostc_id
0,10TH AVE,0
1,10TH AVE E,1
2,10TH AVE NE,2
3,10TH AVE NW,3
4,10TH AVE S,4


In [21]:
# ord_stname_unique 
ostu_df = st_name_type_df['ord_stname_unique'].drop_duplicates().to_frame().sort_values(by = 'ord_stname_unique').reset_index(drop = True)
ostu_df['ostu_id'] = range(0, ostu_df.shape[0])
ostu_df.head()

Unnamed: 0,ord_stname_unique,ostu_id
0,10TH AVE CNTR,0
1,10TH AVE E,1
2,10TH AVE NE,2
3,10TH AVE NW,3
4,10TH AVE S,4


In [22]:
# join to get the id values
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_ost_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = ostc_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = ostu_df)

In [23]:
st_name_type_df.shape

(2457, 10)

In [24]:
st_name_type_df.head()

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,ord_stname_unique,city_sector,osn_id,osn_ost,osn_ost_id,ostc_id,ostu_id
0,100TH,PL,NW 100TH PL,NW 100TH PL,NW,0,100TH PL,0,1450,1451
1,100TH,PL,SW 100TH PL,SW 100TH PL,SW,0,100TH PL,0,2041,2042
2,100TH,ST,N 100TH ST,N 100TH ST,N,0,100TH ST,1,1127,1128
3,100TH,ST,NE 100TH ST,NE 100TH ST,NE,0,100TH ST,1,1253,1254
4,100TH,ST,NW 100TH ST,NW 100TH ST,NW,0,100TH ST,1,1451,1452


In [25]:
st_name_type_df['ord_street_type'].unique()

array(['PL', 'ST', 'LN', 'WAY', 'AVE', 'CT', 'RD', 'BR', 'TER', 'CIR',
       'DR', 'PKWY', 'BLVD', 'PZ', 'VIEW', 'LOOP'], dtype=object)

In [26]:
st_name_type_df['city_sector'].unique()

array(['NW', 'SW', 'N', 'NE', 'S', 'CNTR', 'E', 'W'], dtype=object)

In [27]:
check = st_name_type_df.loc[st_name_type_df['city_sector'] == 'WB', ]

In [28]:
check.head()

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,ord_stname_unique,city_sector,osn_id,osn_ost,osn_ost_id,ostc_id,ostu_id


In [29]:
# let's connect the ord_street_name and the ord_street_type
st_name_type_df['osn_ost'].unique().shape

(1631,)

In [30]:
# LET'S COUNT HOW MANY STREETS WE NEED TO PLOT AND MANUALLY REVIEW

In [31]:
st_name_type_df['n_roads'] = int(1)

In [32]:
st_name_type_df.columns

Index(['ord_street_name', 'ord_street_type', 'ord_stname_concat',
       'ord_stname_unique', 'city_sector', 'osn_id', 'osn_ost', 'osn_ost_id',
       'ostc_id', 'ostu_id', 'n_roads'],
      dtype='object')

In [33]:
pv_df = pd.pivot_table(data = st_name_type_df, values = ['n_roads'],
                          index = ['ord_street_name', 'ord_street_type'],
                          columns = ['city_sector'], aggfunc='sum', fill_value=0,
                          margins=True)

In [34]:
pv_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads
Unnamed: 0_level_1,city_sector,CNTR,E,N,NE,NW,S,SW,W,All
ord_street_name,ord_street_type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
100TH,PL,0,0,0,0,1,0,1,0,2
100TH,ST,0,0,1,1,1,1,1,0,5
101ST,PL,0,0,0,0,0,0,1,0,1
101ST,ST,0,0,1,0,1,1,1,0,4
102ND,LN,0,0,0,0,0,0,1,0,1


In [35]:
# clean up column names
pv_df.columns = [cn[1] for cn in pv_df.columns]

In [36]:
pv_df.columns

Index(['CNTR', 'E', 'N', 'NE', 'NW', 'S', 'SW', 'W', 'All'], dtype='object')

In [37]:
pv_df = pv_df.reset_index()

In [38]:
pv_df.tail()

Unnamed: 0,ord_street_name,ord_street_type,CNTR,E,N,NE,NW,S,SW,W,All
1627,YESLER,WAY,1,1,0,0,0,0,0,0,2
1628,YESLER TERRACE ACC,RD,0,0,0,0,0,1,0,0,1
1629,YORK,RD,0,0,0,0,0,1,0,0,1
1630,YUKON,AVE,0,0,0,0,0,1,0,0,1
1631,All,,170,188,236,386,244,621,390,222,2457


In [39]:
pv_df.shape

(1632, 11)

In [40]:
# grouping by ord_street_name and ord_street_type tells us the number of streets
# there are across the city sectors
# we only need to worry about streets that span at least two sectors
osn_count_df = pv_df.loc[(pv_df['All'] >= 2), :].copy()

In [41]:
# drop the All row
osn_count_df = osn_count_df.loc[osn_count_df['ord_street_name'] != 'All', :]

In [42]:
osn_count_df.head()

Unnamed: 0,ord_street_name,ord_street_type,CNTR,E,N,NE,NW,S,SW,W,All
0,100TH,PL,0,0,0,0,1,0,1,0,2
1,100TH,ST,0,0,1,1,1,1,1,0,5
3,101ST,ST,0,0,1,0,1,1,1,0,4
5,102ND,ST,0,0,1,1,0,1,1,0,4
6,103RD,PL,0,0,0,1,0,0,1,0,2


In [43]:
# melt, to reshape
osnt_df = pd.melt(frame = osn_count_df, id_vars = ['ord_street_name', 'ord_street_type'],var_name='city_sector', value_name = 'n_roads')

In [44]:
# drop zero records
osnt_df = osnt_df.loc[(osnt_df['n_roads'] > 0), :].copy()

In [45]:
osnt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_sector,n_roads
14,10TH,AVE,CNTR,1
30,11TH,AVE,CNTR,1
42,12TH,AVE,CNTR,1
55,13TH,AVE,CNTR,1
61,14TH,AVE,CNTR,1


In [46]:
# the osnt_df is effectively the same shape as the input gdf, but by pivoting we
# can very easily count the number of roads that span multiple sectors by using
# built-in tools. 

In [47]:
osnt_df = osnt_df.loc[(osnt_df['city_sector'] != 'All'), :].copy()

In [48]:
# sort it...
osnt_df = osnt_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'city_sector'])

In [49]:
# we don't need this column
osnt_df = osnt_df.drop(labels = ['n_roads'], axis = 1)

In [50]:
osnt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_sector
1644,100TH,PL,NW
2466,100TH,PL,SW
823,100TH,ST,N
1234,100TH,ST,NE
1645,100TH,ST,NW


In [51]:
# get the numeric value of the streets, for the sort order...
osnt_df['sort_order'] = osnt_df['ord_street_name'].map(get_sort_order)

In [52]:
osnt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_sector,sort_order
1644,100TH,PL,NW,100
2466,100TH,PL,SW,100
823,100TH,ST,N,100
1234,100TH,ST,NE,100
1645,100TH,ST,NW,100


In [53]:
# reorder
col_names = ['sort_order', 'ord_street_name', 'ord_street_type', 'city_sector']
osnt_df = osnt_df[col_names].sort_values(by = col_names[:4])

In [54]:
# set an index - this is helpful for when we write this to excel 
# easier to read for manual review
osnt_df = osnt_df.set_index(keys = col_names[:4])

In [55]:
osnt_df.head()

sort_order,ord_street_name,ord_street_type,city_sector
1,1ST,AVE,CNTR
1,1ST,AVE,N
1,1ST,AVE,NE
1,1ST,AVE,NW
1,1ST,AVE,S


In [56]:
osnt_df.shape

(1217, 0)

In [57]:
osnt_df.columns

Index([], dtype='object')

In [58]:
# save it to excel
ofpn = os.path.join(rc.ANALYSIS_OUTPUT_FILE_PATH, rc.S07_STREET_GROUP_OUT_FILE_NAME)
osnt_df.to_excel(excel_writer=ofpn)


# make a simple plot of each street. 

In [59]:
# each plot will feature the city sector boundaries as created by the convex-hull 
# in step 2 and the different road segments for each street.

In [60]:
# use the same colors for each city sector
cdm = {}
dir_list = ['CNTR', 'E', 'N', 'NE', 'NW', 'S', 'SW', 'W']
for idir, dir_value in enumerate(dir_list):
    cdm[dir_value] = mpl.colormaps["Dark2"].colors[idir]

my_cmap = mpl.colors.ListedColormap([cdm[c] for c in dir_list])

In [61]:
# plot bounds to use across each plot
bounds = [-122.4197794277490061,47.4803548409661005, -122.2200188105690017,47.7341482423694004]

In [62]:
# load the boundaries of the city sectors
ifpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_CITY_SECTORS_OUT_FILE_NAME)
cs_gdf = gpd.read_file(filename=ifpn)

In [63]:
osnt_df = osnt_df.reset_index()

In [64]:
osnt_df.head()

Unnamed: 0,sort_order,ord_street_name,ord_street_type,city_sector
0,1,1ST,AVE,CNTR
1,1,1ST,AVE,N
2,1,1ST,AVE,NE
3,1,1ST,AVE,NW
4,1,1ST,AVE,S


In [65]:
osn_ost_df.shape

(1631, 4)

In [66]:
wosnt_df = osnt_df[['ord_street_name', 'ord_street_type']].drop_duplicates()

In [67]:
wosnt_df.shape

(411, 2)

In [68]:
t_wosnt_df = wosnt_df.iloc[1:10]

In [69]:
t_wosnt_df.head()

Unnamed: 0,ord_street_name,ord_street_type
7,2ND,AVE
14,3RD,AVE
21,3RD,PL
23,4TH,AVE
30,4TH,PL


In [70]:
wosnt_df.shape

(411, 2)

In [71]:
make_plots = False
output_file_name_list = []
if make_plots:
    for irow, row in wosnt_df.iterrows():
        osn = row['ord_street_name']
        ost = row['ord_street_type']


        output_file_name = f"{osn}_{ost}.png"
        ofpn = os.path.join(rc.S06_PLOT_OUTPUT_FILE_PATH, output_file_name)
        print(ofpn)
        output_file_name_list.append(output_file_name)

        # now, let's make a map...
        wgdf = gdf.loc[(gdf['ord_street_name'] == osn) &
                    (gdf['ord_street_type'] == ost), :]
        to_draw = wgdf[['city_sector', 'geometry']].dissolve(by = 'city_sector', as_index = False)
        to_draw['coords'] = to_draw['geometry'].map(lambda x: x.centroid.coords[0])
        fig = plt.figure(layout = 'constrained', figsize = (5, 10))
        gs = GridSpec(1,1, figure = fig, height_ratios = [1])
        ax1 = fig.add_subplot(gs[0,0])
        ax1.set_xlim(bounds[0], bounds[2])
        ax1.set_ylim(bounds[1], bounds[3])
        cs_gdf.plot(ax = ax1, column = 'city_sector', alpha = .2)
        to_draw.plot(ax = ax1, column = 'city_sector', cmap = my_cmap,  linewidth = 5, legend = True)

        for irrow, row in to_draw.iterrows():    
            ax1.annotate(text=row['city_sector'], xy=row['coords'], fontsize = 16 )

        ax1.set_axis_off()

        osn = wgdf['ord_street_name'].unique().tolist()[0]
        ost = wgdf['ord_street_type'].unique().tolist()[0]
        my_title = f"Street Name: {osn} {ost}"
        plt.title(label = my_title)

        #plt.show()

        fig.savefig(fname = ofpn)
        plt.close()

# manually review each plot to identify the groups within each street

In general, avenues run north-south and streets run east-west. 
Avenues can be grouped by:
* NE, E, CNTR, S  
* N, CNTR, S
* NW, W, SW

Streets can be be grouped by:
* NW, N, NE
* W, N, CNTR, E
* SW, S

# BONUS: is every state name represented as a street?

In [72]:
fpn = os.path.join(rc.INPUT_FILE_PATH, 'state_name.csv')

In [73]:
state_df = pd.read_csv(filepath_or_buffer=fpn)

In [74]:
state_df.head()

Unnamed: 0,Name
0,Alabama
1,Alaska
2,Arizona
3,Arkansas
4,California


In [75]:
# build a set of the state names after cleaning and converting to lower case
state_name_set =  set([sn.strip().lower() for sn in state_df['Name']])

In [76]:
len(state_name_set)

51

In [77]:
# do the same for the street name after dropping the streets that start with a number.
street_name_set = set(gdf.loc[-gdf['ord_street_name'].str[0].isin(list('0123456789')), 'ord_street_name'].str.lower().unique().tolist())

In [78]:
len(street_name_set)

1040

In [79]:
test_intersection = state_name_set.intersection(street_name_set)

In [80]:
len(test_intersection)

20

In [81]:
# so, 20 states are represented... 

In [82]:
test_intersection

{'alaska',
 'california',
 'colorado',
 'florida',
 'idaho',
 'illinois',
 'iowa',
 'kansas',
 'maryland',
 'massachusetts',
 'michigan',
 'montana',
 'nebraska',
 'nevada',
 'ohio',
 'oregon',
 'texas',
 'utah',
 'virginia',
 'washington'}

In [83]:
# what's not represented?
state_name_set.difference(street_name_set)

{'alabama',
 'arizona',
 'arkansas',
 'connecticut',
 'delaware',
 'district of columbia',
 'georgia',
 'hawaii',
 'indiana',
 'kentucky',
 'louisiana',
 'maine',
 'minnesota',
 'mississippi',
 'missouri',
 'new hampshire',
 'new jersey',
 'new mexico',
 'new york',
 'north carolina',
 'north dakota',
 'oklahoma',
 'pennsylvania',
 'rhode island',
 'south carolina',
 'south dakota',
 'tennessee',
 'vermont',
 'west virginia',
 'wisconsin',
 'wyoming'}

In [84]:
# what about states with directions in the name and DC?

In [85]:
'dakota' in street_name_set

True

In [86]:
'columbia' in street_name_set

True

In [87]:
'virginia' in street_name_set

True

In [88]:
'carolina' in street_name_set

True

In [89]:
# if we count Dakota as ND and SD
# columbia as Washington DC
# Virginia as both VA and West Virginia
# Carolina as both NC and SC
# that brings us to: 26. 
# so, over half. That's kind of cool.