In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?
# step 02: export street nodes

In [2]:
# standard
import os

In [3]:
# external
from itertools import combinations, product
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
import shapely
from shapely.geometry import LineString, Point
from shapely import line_merge


In [4]:
# custom
from geodataio.geo_operations import points2distance, calculate_initial_compass_bearing

# load the streetnetwork

In [5]:
# file path
file_path = 'H:/project/seattle_streets/data/' 

In [6]:
file_name = 'Street_Network_Database_Complete.gpkg'

In [7]:
fpn = os.path.join(file_path, file_name)

In [8]:
gdf = gpd.read_file(filename = fpn)

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry'],
      dtype='object')

# GENERATE STREET END VERTICES - THESE WILL BE HELPFUL WITH IDENTIFICATION IN SUBSEQUENT STEPS

In [10]:
def generate_street_end_vertices(gdf:gpd.GeoDataFrame):    
    
    
    # export the vertex of each street end. 
    gdf['start_coord'] = gdf['geometry'].map(lambda x: x.coords[0])
    
    gdf['end_coord'] = gdf['geometry'].map(lambda x: x.coords[-1])
    
    f_node_gdf = gdf[['f_intr_id', 'start_coord']].copy().rename(columns = {'f_intr_id':'node_id',
                                                                           'start_coord':'coord'})
    t_node_gdf = gdf[['t_intr_id', 'end_coord']].copy().rename(columns = {'t_intr_id':'node_id',
                                                                         'end_coord':'coord'})
    
    node_df = pd.concat(objs = [f_node_gdf, t_node_gdf]).drop_duplicates()
    
    # make a gdf
    node_gdf = gpd.GeoDataFrame(data = node_df, geometry =
                                node_df['coord'].map(lambda x: Point(x)), crs = 4326)
    
    node_gdf = node_gdf.drop(labels = ['coord'], axis = 1)

    # remove the start and end coordinates
    gdf = gdf.drop(labels = ['start_coord', 'end_coord'], axis = 1)
    
    return gdf, node_gdf

In [11]:
# can we generate the vertices? yes... but let's check what type of geometry we are working with
gdf['geometry'].map(lambda x: x.geom_type).value_counts()

geometry
MultiLineString    34378
Name: count, dtype: int64

In [12]:
# everything is a multiline string??? is that necessary?

In [13]:
test_gdf = gdf.explode()

In [14]:
test_gdf.shape

(34378, 38)

In [15]:
# nope! Let's use the .explode() function to convert things to a LineString

In [16]:
gdf = test_gdf.copy()

In [17]:
gdf['geometry'].geom_type.value_counts()

LineString    34378
Name: count, dtype: int64

In [18]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [19]:
output_file_name = 'Street_Network_Nodes.gpkg'

In [20]:
ofpn = os.path.join(file_path, output_file_name)

In [21]:
node_gdf.to_file(filename = ofpn)

# import the classified streets - this classifies streets without a classification. 

In [22]:
file_name = 'blank_street_type_modified.xlsx'

In [23]:
fpn = os.path.join(file_path, file_name)

In [24]:
blank_street_type_df = pd.read_excel(io = fpn)

In [25]:
blank_street_type_df.head()

Unnamed: 0,ord_stname_concat,ord_st_type
0,NE SUNRISE VISTA,ST
1,NW ESPLANADE,ST
2,BROADWAY E,ST
3,FAUNTLEE CREST SW,ST
4,NE FOREST VISTA,ST


In [26]:
gdf['ord_street_type'].unique()

array(['AVE', 'CT', 'ST', 'PL', 'LN', 'RD', 'DR', 'WAY', 'PKWY', 'BLVD',
       'CIR', 'TER', None, 'AL', 'RP', 'LOOP', 'OP', 'TRL', 'BR', 'ET',
       'VIEW', 'HWY', 'PZ', 'MALL'], dtype=object)

In [27]:
gdf = pd.merge(left = gdf, right = blank_street_type_df, how = 'left')

In [28]:
gdf.loc[gdf['ord_st_type'].isna(), 'ord_st_type'] = gdf.loc[gdf['ord_st_type'].isna(), 'ord_street_type']

In [29]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry',
       'ord_st_type'],
      dtype='object')

In [30]:
gdf['ord_st_type'].unique()

array(['AVE', 'CT', 'ST', 'PL', 'LN', 'RD', 'DR', 'WAY', 'PKWY', 'BLVD',
       'CIR', 'TER', 'AL', 'RP', 'LOOP', 'SR', 'IS', 'OP', 'TRL', 'RR',
       'BR', 'WKWY', 'ET', 'STCR', 'VIEW', 'FLYOVER', 'HWY', 'RN', 'PZ',
       'MALL'], dtype=object)

# keep only streets in Seattle

In [31]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

# WRITE THE FULL SEATTLE STREETS TO DISK

In [32]:
output_file_name = 'Street_Network_Database_Seattle_Full.gpkg'

In [33]:
ofpn = os.path.join(file_path, output_file_name)

In [34]:
gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)

# EXPORT THE INDIVIDUAL STREET PORTIONS TO MAKE THE DIFFERENT SECTIONS

In [35]:
# after performing some analysis in GIS, we can quickly identify the central 

In [36]:
file_name = 'Street_Network_Database_Seattle_Central_Streets.csv'

In [37]:
fpn = os.path.join(file_path, file_name)

In [38]:
cs_df = pd.read_csv(filepath_or_buffer=fpn)

In [39]:
cs_df = cs_df.drop(labels = ['fid'], axis =  1)

In [40]:
# working_gdf
# winnow down the list of streets to get what we're looking for 

q_str = 'snd_feacode in (1, 5) and st_code in (0, ) and city_portion != \'WB\''
wgdf = gdf.query(expr = q_str)

In [41]:
output_file_path = 'H:/project/seattle_streets/data/city_section'

if not os.path.exists(output_file_path):
    os.makedirs(output_file_path)

data_list = []
geom_list = []
for cp in wgdf['city_portion'].unique().tolist():
    print(cp)
    if cp == ' ':
        temp_geoms = wgdf.loc[(wgdf['snd_id'].isin(cs_df['snd_id'])) &
        (wgdf['city_portion'] == cp), 'geometry']
    else:
        temp_geoms = wgdf.loc[wgdf['city_portion'] == cp, 'geometry']
    
    
    # # create the concave hull
    # point_cloud = []
    # for tg in temp_geoms:
    #     for coords in tg.coords:
    #         point_cloud.append(Point(coords))
    # point_cloud = shapely.unary_union(geometries = point_cloud)
    # ccv_hull = shapely.concave_hull(geometry = point_cloud)    
    # temp_list = [cp, 'concave']
    # data_list.append(temp_list)
    # geom_list.append(ccv_hull)

    # create the convex hull
    geoms = shapely.unary_union(geometries = temp_geoms)
    cvx_hull = shapely.convex_hull(geometry = geoms)
    temp_list = [cp, 'convex']
    data_list.append(temp_list)
    geom_list.append(cvx_hull)   
    
    
    
    

S
E
NW
N
 
NE
SW
W


In [42]:
output_gdf = gpd.GeoDataFrame(data = data_list, columns = ['city_portion', 'hull_type'], geometry = geom_list, crs = 'epsg:4326')

In [43]:
output_file_path = 'H:/project/seattle_streets/data/'

In [44]:
file_name = 'city_sections.gpkg'

In [45]:
ofpn = os.path.join(output_file_path, file_name)

In [46]:
output_gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)

# REMOVE "STREETS" THAT ARE REALLY SHORT TURNOUTS AND WALKING PATHS

AL: Alley
TRL: Trail
CIR: Circle
TER: Terrace
OP: Overpass
RP: Ramp
ET: Extension
RN: TURN




In [47]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry',
       'ord_st_type'],
      dtype='object')

In [59]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER', 'STCR', 'ET', 'RN', 'RP']

In [60]:
gdf.shape

(27405, 40)

In [61]:
gdf = gdf.loc[-gdf['ord_street_type'].isin(street_type_to_remove), :].copy()

In [62]:
gdf.shape

(27405, 40)

In [63]:
gdf['ord_street_type'].unique()

array(['AVE', 'ST', 'PL', 'LN', 'RD', 'WAY', 'PKWY', 'DR', 'CT', 'BLVD',
       'CIR', 'TER', None, 'BR', 'VIEW', 'LOOP', 'PZ', 'MALL'],
      dtype=object)

# LABEL STREET SUB-GROUPS

In [64]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry',
       'ord_st_type', 'snd_group'],
      dtype='object')

In [65]:
def identify_street_groups(gdf:gpd.GeoDataFrame, s_names:list = None):
    
    if s_names is None:
        s_names = gdf['ord_stname_concat'].unique().tolist()

    gdf['snd_group'] = int(0)
    
    snd_group_count = -1
    for sn in s_names:
        # get a street by name
        print(sn)
        # subset the gdf
        temp_gdf = gdf.loc[gdf['ord_stname_concat'] == sn, :].copy()        
        
        # build a graph to find (dis)connected components: this is an easy way to find portions of a street that are not connected to each other
        g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
        # do I need to save the graph?
        n_connected_components = nx.number_connected_components(g)        
        
        for c in nx.connected_components(g):
            # c is the sub graph
            # this is the edge data
            edge_data_list = g.subgraph(c).edges.data()
            # this marks the street(s) in each sub group
            snd_id_list = []
            for edl in edge_data_list:                        
                snd_id_list.append(edl[2]['snd_id'])
            snd_group_count += 1
            # this will update the gdf per street name with the groups of streets
            gdf.loc[gdf['snd_id'].isin(snd_id_list), 'snd_group'] = snd_group_count 

    return gdf

    

In [66]:
gdf = identify_street_groups(gdf = gdf)

8TH AVE S
BELMONT AVE E
E PIKE ST
NW 63RD ST
MCCLINTOCK AVE S
NORTH PARK AVE N
8TH AVE
37TH AVE S
25TH AVE NE
S WAITE ST
NE 107TH ST
NW 42ND ST
LINDEN AVE N
NE 40TH ST
FAUNTLEROY PL SW
W GALER ST
SW WALKER ST
20TH AVE NW
S BYRON ST
S FOREST ST
VICTORY LN NE
18TH AVE SW
26TH AVE NW
29TH AVE S
27TH AVE NE
17TH AVE NW
KIRKWOOD PL N
7TH AVE NW
NE 45TH ST
ROWAN RD S
28TH AVE SW
S 130TH ST
38TH AVE E
E MARION ST
S HOLLY ST
N 95TH ST
E OLIVE ST
E LOUISA ST
E REPUBLICAN ST
15TH AVE SW
CEDAR ST
5TH AVE W
45TH AVE NE
W RAYE ST
S DAKOTA ST
NE 91ST ST
NW 70TH ST
18TH AVE NE
48TH AVE SW
S 102ND ST
W FULTON ST
NE 68TH ST
W RUFFNER ST
35TH AVE
46TH AVE SW
8TH AVE NE
CHERRY ST
21ST AVE S
37TH AVE W
ALKI AVE SW
DAYTON AVE N
SW 102ND ST
N 90TH ST
SW ANDOVER ST
FREMONT AVE N
THOMAS ST
27TH AVE W
WESTERN AVE
SW DAKOTA ST
STONE AVE N
NW 94TH ST
NE 123RD ST
24TH AVE W
NE 130TH PL
NW 62ND ST
48TH AVE S
3RD AVE NE
WESTMONT WAY W
37TH AVE SW
FAIRVIEW AVE E
NE 55TH ST
46TH AVE S
NW 105TH ST
47TH AVE SW
28TH AVE

In [67]:
gdf.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,snd_feacode,citycode,stname_id,st_code,arterial_code,segment_type,agency_code,...,r_zip,sndseg_update,compkey,comptype,unitid,unitid2,city_portion,geometry,ord_st_type,snd_group
0,17740,17791,33478,5,1,566,0,1,1,1,...,98108,2007-04-22 00:00:00+00:00,8460,68,4805,820,S,"LINESTRING (-122.32287 47.52982, -122.32289 47...",AVE,0
1,11391,11295,20097,1,1,659,0,0,1,1,...,98102,2007-04-22 00:00:00+00:00,9236,68,5465,10,E,"LINESTRING (-122.32402 47.61849, -122.32403 47...",AVE,11
3,11695,11696,20460,77,1,1444,20,0,6,1,...,98122,2004-05-19 00:00:00+00:00,0,0,0,0,E,"LINESTRING (-122.30782 47.61411, -122.30749 47...",ST,12
4,6257,6252,9237,1,1,2086,0,0,1,1,...,98107,2012-10-12 00:00:00+00:00,18575,68,14630,200,NW,"LINESTRING (-122.38214 47.67456, -122.38476 47...",ST,15
6,14191,14281,29554,1,1,1051,0,0,1,1,...,98144,2007-04-22 00:00:00+00:00,11712,68,8545,320,S,"LINESTRING (-122.29249 47.57475, -122.29171 47...",AVE,16


# SAVE AS A GDF - OVERWRITE

In [68]:
# file path
output_file_name = 'Street_Network_Database_Seattle_working.gpkg'

In [69]:
ofpn = os.path.join(file_path, output_file_name)

In [70]:
gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)