In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?

In [2]:
# standard
import os

In [3]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd

# load the shapefile as a gpd

In [4]:
# file path
file_path = 'H:/project/seattle_streets/data/WGS84/' 

In [5]:
file_name = 'Street_Network_Database.shp'

In [6]:
fpn = os.path.join(file_path, file_name)

In [7]:
gdf = gpd.read_file(filename = fpn)

In [8]:
gdf.columns = [cn.lower() for cn in gdf.columns]

In [9]:
# create a network from the dataframe

In [10]:
col_names = ['f_intr_id', 't_intr_id', 'snd_id', 'ord_street', 'ord_stname', 'l_city', 'r_city']

In [11]:
df = gdf[col_names].copy()

In [43]:
df.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,ord_street,ord_stname,l_city,r_city
0,13827,13823,26941,STEVENS,SW STEVENS ST,SEATTLE,SEATTLE
1,8721,8722,11377,37TH,N 37TH ST,SEATTLE,SEATTLE
2,3606,3608,6656,110TH,NE 110TH ST,SEATTLE,SEATTLE
3,3784,3767,6770,106TH,NE 106TH ST,SEATTLE,SEATTLE
4,14373,14456,27361,56TH,56TH AVE SW,SEATTLE,SEATTLE


In [12]:
# unique street names 
s_names = df['ord_stname'].unique().tolist()

In [13]:
len(s_names)

3650

In [14]:
testo = df.loc[df['ord_stname'].str.find('GALER') > -1, 'ord_stname'].unique()

In [15]:
testo

array(['W GALER ST', 'GALER ST', 'E GALER ST', 'GALER PL N',
       'W GALER ST FLYOVER'], dtype=object)

In [16]:
s_names_test = ['W GALER ST']

In [68]:
gdf['ord_stre_1'].isna().value_counts()

ord_stre_1
False    33396
True       661
Name: count, dtype: int64

In [69]:
testo = gdf.loc[gdf['ord_stre_1'].isna(), :]

In [71]:
testo['ord_stname'].unique()

array(['I5 SB', 'BROADWAY E', 'NW ESPLANADE', 'FAUNTLEE CREST SW',
       'SR518', 'NE FOREST VISTA', 'I405', 'NE URBAN VISTA',
       'PIKE HILL CLIMB', 'SR509', 'UW WALKWAY', 'BATTERY ST TUNNEL NB',
       'GOVERNMENT LOCKS', 'W THOMAS ST OVERPASS', 'NP RR',
       'BALLARD TERMINAL RR', 'CMSP AND P RR', 'LINK LIGHT RAIL', 'BN RR',
       'WATERFRONT TROLLEY', 'UP RR', 'ALKI - HALLECK WKWY', 'BROADWAY',
       'W QUEEN ANNE DRIVEWAY', '59TH AV SW TURN', 'W GALER ST FLYOVER',
       'FIRST HILL STREETCAR', 'S LK UNION STREETCAR', 'RAINIER VISTA NE',
       'BLAKELY - 12TH WKWY', 'SW 98TH ST WKWY', 'SR522',
       '17TH AVE NW WKWY', 'S BRADFORD ST WKWY', 'ALLEY N OF GLENWILDE',
       'MONORAIL', 'WELLER ST WALKWAY', '21ST AV SW TURN',
       'I90 WB TUNNEL', 'I90 EXPRESS TUNNEL', 'I90 EB TUNNEL',
       'BATTERY ST TUNNEL SB', 'I5 NB', 'SR509 SB', 'SR599 NB',
       'SR509 NB', 'AIRPORT FY SB', 'SR599 SB', 'AIRPORT FY NB',
       'SR518 EB', 'SR518 WB', 'SR520 EB', 'I90 WB', 'I90 EB'

In [72]:
gdf.loc[gdf['ord_stre_1'].isna(), 'ord_stre_1', ] = 'misc'

In [73]:
output_list = []
output_gdf_list = []
for sn in s_names:
    print(sn)
    temp_gdf = gdf.loc[gdf['ord_stname'] == sn, ['f_intr_id', 't_intr_id', 'ord_stre_1', 'ord_stname', 'geometry']].copy()
    g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
    n_connected_components = nx.number_connected_components(g)
    temp_list = [sn, n_connected_components]
    output_list.append(temp_list)
    if n_connected_components > 1:
        # dissolve this lovely
        diss_gdf = temp_gdf.dissolve(by = 'ord_stname')
        diss_gdf['n_parts'] = n_connected_components
        output_gdf_list.append(diss_gdf)
        # dissolve the geometry
        
        

SW STEVENS ST
N 37TH ST
NE 110TH ST
NE 106TH ST
56TH AVE SW
S 132ND ST
S MORGAN ST
68TH AVE S
NE 183RD CT
NE 162ND ST
14TH AVE NE
35TH AVE W
E FIR ST
S EDMUNDS ST
WELLINGTON AVE
41ST AVE E
NW 110TH ST
E OLIVE ST
E HARRISON ST
E CONOVER CT
PIKE PL
BAKER AVE NW
DAYTON AVE N
20TH AVE NE
RAYE ST
EARL AVE NW
LETITIA AVE S
NE 200TH ST
2ND AVE NW
S 127TH ST
1ST AVE S
SW 154TH ST
21ST AVE SW
W BOTHWELL ST
SW TRENTON ST
N 48TH ST
E THOMAS ST
NE 138TH ST
N 38TH ST
9TH AVE NE
WOODLAWN AVE N
SW WINTHROP ST
E SENECA ST
CORLISS AVE N
51ST AVE SW
32ND AVE NE
LINDEN AVE N
WHITMAN AVE N
STONE AVE N
3RD AVE NE
S 100TH ST
NE 165TH ST
NE 147TH ST
49TH AVE SW
NE 177TH ST
CRESTWOOD DR S
N 87TH ST
SW FRONTENAC ST
NW MARKET ST
N 141ST ST
W GALER ST
NE 60TH ST
NE 77TH ST
S ALASKA ST
SW ADMIRAL WAY
S LUCILE ST
51ST AVE S
33RD AVE S
11TH AVE
27TH AVE
38TH AVE S
4TH AVE N
28TH AVE S
SW 159TH ST
8TH AVE NE
82ND AVE S
FREMONT AVE N
SW PRINCE ST
NW 55TH PL
S BENNETT ST
E ALDER ST
S HUDSON ST
S CHARLES ST
HARVARD AVE

In [74]:
con_df = pd.DataFrame(data = output_list, columns = ['ord_stname', 'n_parts'])

In [75]:
con_df = con_df.sort_values(by = ['n_parts'], ascending = False)

In [76]:
con_df.head()

Unnamed: 0,ord_stname,n_parts
96,1ST AVE NW,25
28,2ND AVE NW,24
234,PALATINE AVE N,21
1331,35TH AVE S,21
847,DENSMORE AVE N,20


In [77]:
con_df['n_parts'].describe()

count    3650.000000
mean        2.428219
std         2.727894
min         1.000000
25%         1.000000
50%         1.000000
75%         3.000000
max        25.000000
Name: n_parts, dtype: float64

In [78]:
my_gdf = pd.concat(objs = output_gdf_list, axis = 0)

In [79]:
my_gdf.shape

(1542, 5)

In [80]:
output_file_path = 'H:/project/seattle_streets/data/'

In [81]:
output_file_name = 'multi_part_streets.gpkg'

In [82]:
ofpn = os.path.join(output_file_path, output_file_name)

In [83]:
my_gdf.to_file(filename = ofpn, driver = 'GPKG')