In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?
# step 02: export street nodes

In [2]:
# standard
import os

In [3]:
# external
from itertools import combinations, product
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point
from shapely import line_merge

In [4]:
# custom
from geodataio.geo_operations import points2distance, calculate_initial_compass_bearing

# load the shapefile as a geogandas data

In [5]:
# file path
file_path = 'H:/project/seattle_streets/data/' 

In [6]:
file_name = 'Street_Network_Database.gpkg'

In [7]:
fpn = os.path.join(file_path, file_name)

In [8]:
gdf = gpd.read_file(filename = fpn)

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry'],
      dtype='object')

# GENERATE STREET END VERTICES - THESE WILL BE HELPFUL WITH IDENTIFICATION IN SUBSEQUENT STEPS

In [10]:
def generate_street_end_vertices(gdf:gpd.GeoDataFrame):    
    
    
    # export the vertex of each street end. 
    gdf['start_coord'] = gdf['geometry'].map(lambda x: x.coords[0])
    
    gdf['end_coord'] = gdf['geometry'].map(lambda x: x.coords[-1])
    
    f_node_gdf = gdf[['f_intr_id', 'start_coord']].copy().rename(columns = {'f_intr_id':'node_id',
                                                                           'start_coord':'coord'})
    t_node_gdf = gdf[['t_intr_id', 'end_coord']].copy().rename(columns = {'t_intr_id':'node_id',
                                                                         'end_coord':'coord'})
    
    node_df = pd.concat(objs = [f_node_gdf, t_node_gdf]).drop_duplicates()
    
    # make a gdf
    node_gdf = gpd.GeoDataFrame(data = node_df, geometry =
                                node_df['coord'].map(lambda x: Point(x)), crs = 4326)
    
    node_gdf = node_gdf.drop(labels = ['coord'], axis = 1)

    # remove the start and end coordinates
    gdf = gdf.drop(labels = ['start_coord', 'end_coord'], axis = 1)
    
    return gdf, node_gdf

In [11]:
# can we generate the vertices? yes... but let's check what type of geometry we are working with
gdf['geometry'].map(lambda x: x.geom_type).value_counts()

LineString    34057
Name: count, dtype: int64

In [12]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [13]:
output_file_name = 'Street_Network_Nodes.gpkg'

In [14]:
ofpn = os.path.join(file_path, output_file_name)

In [15]:
node_gdf.to_file(filename = ofpn)

In [16]:
# import the classified streets - this classifies streets without a classification. 

In [17]:
file_name = 'blank_street_type_modified.xlsx'

In [18]:
fpn = os.path.join(file_path, file_name)

In [19]:
blank_street_type_df = pd.read_excel(io = fpn)

In [20]:
blank_street_type_df.head()

Unnamed: 0,ord_stname,ord_st_type
0,I5 SB,IS
1,BROADWAY E,ST
2,NW ESPLANADE,ST
3,FAUNTLEE CREST SW,ST
4,SR518,SR


In [21]:
gdf['ord_street_type'].unique()

array(['ST', 'AVE', 'CT', 'PL', 'DR', 'WAY', 'BLVD', 'LN', 'TRL', 'RD',
       'CIR', None, 'PKWY', 'TER', 'AL', 'VI', 'OP', 'RP', 'BR', 'LOOP',
       'HWY', 'ET', 'VIEW', 'PZ', 'MALL'], dtype=object)

In [22]:
gdf = pd.merge(left = gdf, right = blank_street_type_df, how = 'left')

In [23]:
gdf.loc[gdf['ord_st_type'].isna(), 'ord_st_type'] = gdf.loc[gdf['ord_st_type'].isna(), 'ord_street_type']

In [24]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry', 'ord_st_type'],
      dtype='object')

In [25]:
gdf['ord_st_type'].unique()

array(['ST', 'AVE', 'CT', 'PL', 'DR', 'WAY', 'BLVD', 'LN', 'TRL', 'RD',
       'CIR', 'IS', 'PKWY', 'TER', 'AL', 'VI', 'OP', 'SR', 'WKWY', 'RP',
       'BR', 'LOOP', 'HWY', 'RR', 'ET', 'VIEW', 'FLYOVER', 'STCR', 'RN',
       'PZ', 'MALL'], dtype=object)

# REMOVE "STREETS" THAT ARE REALLY SHORT TURNOUTS AND WALKING PATHS

AL: Alley
TRL: Trail
CIR: Circle
TER: Terrace
OP: Overpass
RP: Ramp
ET: Extension
RN: TURN




In [26]:
test_type = 'RN'
gdf.loc[gdf['ord_st_type'] == test_type, 'ord_stname'].head()

30019    21ST AV SW TURN
Name: ord_stname, dtype: object

In [27]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER', 'STCR', 'ET', 'RN', 'RP']

In [28]:
gdf.shape

(34057, 39)

In [29]:
gdf = gdf.loc[-gdf['ord_st_type'].isin(street_type_to_remove), :].copy()

In [30]:
gdf.shape

(32877, 39)

In [31]:
# keep only streets in Seattle

In [32]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry', 'ord_st_type'],
      dtype='object')

In [33]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

In [34]:
gdf.shape

(26725, 39)

In [35]:
# LABEL STREET SUB-GROUPS

In [36]:
# this is the street type and the street name, but not the direction
gdf['ord_street_st_type'] = gdf['ord_street'] + ' ' + gdf['ord_st_type']

In [37]:
def identify_street_groups(gdf:gpd.GeoDataFrame, s_names:list = None):
    
    if s_names is None:
        s_names = gdf['ord_stname'].unique().tolist()

    gdf['snd_group'] = int(0)
    
    snd_group_count = -1
    for sn in s_names:
        # get a street by name
        print(sn)
        # subset the gdf
        temp_gdf = gdf.loc[gdf['ord_stname'] == sn, :].copy()        
        
        # build a graph to find (dis)connected components: this is an easy way to find portions of a street that are not connected to each other
        g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
        # do I need to save the graph?
        n_connected_components = nx.number_connected_components(g)        
        
        for c in nx.connected_components(g):
            # c is the sub graph
            # this is the edge data
            edge_data_list = g.subgraph(c).edges.data()
            # this marks the street(s) in each sub group
            snd_id_list = []
            for edl in edge_data_list:                        
                snd_id_list.append(edl[2]['snd_id'])
            snd_group_count += 1
            # this will update the gdf per street name with the groups of streets
            gdf.loc[gdf['snd_id'].isin(snd_id_list), 'snd_group'] = snd_group_count 

    return gdf

    

In [38]:
gdf = identify_street_groups(gdf = gdf)

SW STEVENS ST
N 37TH ST
NE 110TH ST
NE 106TH ST
56TH AVE SW
S MORGAN ST
68TH AVE S
35TH AVE W
E FIR ST
S EDMUNDS ST
WELLINGTON AVE
41ST AVE E
NW 110TH ST
E OLIVE ST
E HARRISON ST
E CONOVER CT
PIKE PL
BAKER AVE NW
DAYTON AVE N
20TH AVE NE
RAYE ST
EARL AVE NW
LETITIA AVE S
S 127TH ST
W BOTHWELL ST
SW TRENTON ST
N 48TH ST
E THOMAS ST
NE 138TH ST
N 38TH ST
9TH AVE NE
WOODLAWN AVE N
SW WINTHROP ST
E SENECA ST
CORLISS AVE N
51ST AVE SW
32ND AVE NE
S 100TH ST
49TH AVE SW
CRESTWOOD DR S
N 87TH ST
SW FRONTENAC ST
NW MARKET ST
N 141ST ST
W GALER ST
NE 60TH ST
NE 77TH ST
S ALASKA ST
SW ADMIRAL WAY
S LUCILE ST
51ST AVE S
33RD AVE S
11TH AVE
27TH AVE
38TH AVE S
4TH AVE N
28TH AVE S
14TH AVE NE
LINDEN AVE N
82ND AVE S
FREMONT AVE N
SW PRINCE ST
NW 55TH PL
S BENNETT ST
E ALDER ST
S HUDSON ST
S CHARLES ST
HARVARD AVE
TROLL AVE N
SW BRANDON ST
E ARTHUR PL
E DENNY WAY
W THOMAS ST
NE 38TH ST
11TH AVE E
12TH AVE NW
46TH AVE S
2ND AVE NW
57TH AVE NE
26TH AVE
29TH AVE W
1ST AVE NW
S HILL ST
SW BARTON ST
S J

In [39]:
gdf.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,snd_feacod,citycode,stname_id,st_code,arterial_c,segment_ty,agency_cod,...,sndseg_upd,compkey,comptype,unitid,unitid2,city_portion,geometry,ord_st_type,ord_street_st_type,snd_group
0,13827,13823,26941,1,1,2734,0,0,1,1,...,2005-05-04,22863,68,18205,590,SW,"LINESTRING (-122.40808 47.57771, -122.40824 47...",ST,STEVENS ST,0
1,8721,8722,11377,1,1,1605,0,0,1,1,...,2005-05-04,15349,68,12005,240,N,"LINESTRING (-122.33029 47.65195, -122.32977 47...",ST,37TH ST,7
2,3606,3608,6656,5,1,1702,0,1,1,1,...,2005-05-04,16291,68,12865,380,NE,"LINESTRING (-122.28808 47.70834, -122.28674 47...",ST,110TH ST,8
3,3784,3767,6770,1,1,1697,0,0,1,1,...,2005-05-04,16261,68,12840,480,NE,"LINESTRING (-122.27712 47.70534, -122.27604 47...",ST,106TH ST,9
4,14373,14456,27361,1,1,459,0,0,1,1,...,2007-04-22,7763,68,4260,370,SW,"LINESTRING (-122.40387 47.57132, -122.40387 47...",AVE,56TH AVE,16


# SAVE AS A GDF - OVERWRITE

In [40]:
# file path
output_file_name = 'Street_Network_Database.gpkg'

In [41]:
ofpn = os.path.join(file_path, output_file_name)

In [42]:
gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)