In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?

In [2]:
# standard
import os

In [3]:
# external
from itertools import combinations, product
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point
from shapely import line_merge

In [4]:
# custom
from geodataio.geo_operations import points2distance, calculate_initial_compass_bearing

# load the shapefile as a gpd

In [5]:
# file path
file_path = 'H:/project/seattle_streets/data/' 

In [6]:
file_name = 'Street_Network_Database.gpkg'

In [7]:
fpn = os.path.join(file_path, file_name)

In [8]:
gdf = gpd.read_file(filename = fpn)

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry'],
      dtype='object')

In [10]:
# import the classified streets

In [11]:
file_name = 'blank_street_type_modified.xlsx'

In [12]:
fpn = os.path.join(file_path, file_name)

In [13]:
blank_street_type_df = pd.read_excel(io = fpn)

In [14]:
blank_street_type_df.head()

Unnamed: 0,ord_stname,ord_st_type
0,I5 SB,IS
1,BROADWAY E,ST
2,NW ESPLANADE,ST
3,FAUNTLEE CREST SW,ST
4,SR518,SR


In [15]:
gdf['ord_street_type'].unique()

array(['ST', 'AVE', 'CT', 'PL', 'DR', 'WAY', 'BLVD', 'LN', 'TRL', 'RD',
       'CIR', None, 'PKWY', 'TER', 'AL', 'VI', 'OP', 'RP', 'BR', 'LOOP',
       'HWY', 'ET', 'VIEW', 'PZ', 'MALL'], dtype=object)

In [16]:
gdf = pd.merge(left = gdf, right = blank_street_type_df, how = 'left')

In [17]:
gdf.loc[gdf['ord_st_type'].isna(), 'ord_st_type'] = gdf.loc[gdf['ord_st_type'].isna(), 'ord_street_type']

In [18]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry', 'ord_st_type'],
      dtype='object')

In [19]:
gdf['ord_st_type'].unique()

array(['ST', 'AVE', 'CT', 'PL', 'DR', 'WAY', 'BLVD', 'LN', 'TRL', 'RD',
       'CIR', 'IS', 'PKWY', 'TER', 'AL', 'VI', 'OP', 'SR', 'WKWY', 'RP',
       'BR', 'LOOP', 'HWY', 'RR', 'ET', 'VIEW', 'FLYOVER', 'STCR', 'RN',
       'PZ', 'MALL'], dtype=object)

# CLEAN UP

AL: Alley
TRL: Trail
CIR: Circle
TER: Terrace
OP: Overpass
RP: Ramp
ET: Extension
RN: TURN




In [20]:
test_type = 'RN'
gdf.loc[gdf['ord_st_type'] == test_type, 'ord_stname'].head()

30019    21ST AV SW TURN
Name: ord_stname, dtype: object

In [21]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER', 'STCR', 'ET', 'RN', 'RP']

In [22]:
gdf.shape

(34057, 39)

In [23]:
gdf = gdf.loc[-gdf['ord_st_type'].isin(street_type_to_remove), :].copy()

In [24]:
gdf.shape

(32877, 39)

In [25]:
# keep only streets in Seattle

In [26]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry', 'ord_st_type'],
      dtype='object')

In [27]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') &
(gdf['r_city'] == 'SEATTLE'), :].copy()

In [28]:
gdf.shape

(26596, 39)

In [29]:
# PASS 1: FOCUS ON JUST THE STREETS, REGARDLESS OF DIRECTION AND TYPE

In [30]:
gdf['ord_street_st_type'] = gdf['ord_street'] + ' ' + gdf['ord_st_type']

In [31]:
agg_df = gdf[['ord_stname']].drop_duplicates().copy()

In [32]:
agg_df.head()

Unnamed: 0,ord_stname
0,SW STEVENS ST
1,N 37TH ST
2,NE 110TH ST
3,NE 106TH ST
4,56TH AVE SW


In [37]:
s_names = agg_df['ord_stname'].tolist()

In [44]:
s_names = ['BEACON AVE S']

In [45]:
output_gdf_list = []
# extra explode gdf
ex_gdf_list = []
ex_ex_gdf_list = []
for sn in s_names[:None]:
    # get a street by name
    print(sn)
    # subset the gdf
    temp_gdf = gdf.loc[gdf['ord_stname'] == sn, :].copy()
    
    # a road group id
    temp_gdf['snd_group'] = int(0)

    # build a graph to find connected components
    g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
    n_connected_components = nx.number_connected_components(g)
    
    # get each sub graph
    snd_group_count = -1
    for c in nx.connected_components(g):
        # c is the sub graph
        # this is the edge data
        edge_data_list = g.subgraph(c).edges.data()
        # this marks the street(s) in each sub group
        snd_id_list = []
        for edl in edge_data_list:                        
            snd_id_list.append(edl[2]['snd_id'])
        snd_group_count += 1
        temp_gdf.loc[temp_gdf['snd_id'].isin(snd_id_list), 'snd_group'] = snd_group_count        

    # now, dissolve by the ord_street_st_type and snd_group
    col_names = ['ord_street_st_type', 'ord_street', 'ord_st_type', 'ord_stname', 'snd_group', 'geometry']
    diss_gdf = temp_gdf[col_names].dissolve(by = col_names[:-1], as_index = False)
    # create single linestrings!
    diss_gdf['geometry'] = diss_gdf['geometry'].map(lambda x: line_merge(x))
    diss_gdf['geom_type'] = diss_gdf['geometry'].map(lambda x: x.geom_type)
    
    # blow up the multi-line string
    if 'MultiLineString' in diss_gdf['geom_type'].unique():
        # increment the last value of the snd_group_count - this is the last value
        snd_group_count += 1
        # the multilinestring
        mls_gdf = diss_gdf.loc[diss_gdf['geom_type'] == 'MultiLineString', :].copy()
        # the linestrings
        ls_gdf = diss_gdf.loc[diss_gdf['geom_type'] == 'LineString', :].copy()
        # explode
        mls_gdf = mls_gdf.explode(index_parts = False)
        
        # dissolve lines
        mls_gdf['geometry'] = mls_gdf['geometry'].map(lambda x: line_merge(x))
        # get line types
        mls_gdf['geom_type'] = mls_gdf['geometry'].map(lambda x: x.geom_type)

        # assign indices
        mls_gdf['snd_group'] = range(snd_group_count, snd_group_count + mls_gdf.shape[0])
        # concatenate
        diss_gdf = pd.concat(objs = [ls_gdf, mls_gdf])
        
        diss_gdf = diss_gdf.reset_index(drop = True)
        ex_ex_gdf_list.append(diss_gdf.copy())             
    
    # add the dissolved gdf to the output list
    ex_gdf_list.append(diss_gdf.copy())
        
    # get start and end coords
    diss_gdf['sp_coords'] = diss_gdf['geometry'].map(lambda x: x.coords[0])
    diss_gdf['ep_coords'] = diss_gdf['geometry'].map(lambda x: x.coords[-1])
    # add ids
    diss_gdf['sp_id'] = range(diss_gdf.shape[0])    
    diss_gdf['ep_id'] = range(diss_gdf.shape[0], diss_gdf.shape[0] * 2)

    # get the coords
    node_dict = {}
    for my_row in diss_gdf.itertuples():
        node_dict[my_row.sp_id] = my_row.sp_coords
        node_dict[my_row.ep_id] = my_row.ep_coords    
    
    if diss_gdf.shape[0] > 1:
        #output_gdf_list.append(diss_gdf)
        # let's split and then recombine
        # output_gdf_list.append(temp_gdf)
        # dissolve the geometry                    
        
        # we can't assume that start and end point are ordered in any way.
        # so we need to do all product pairs
        
        # now, we have the basic part of the graph
        
        # build the simple graph, removing intermediate nodes and edges
        g = nx.from_pandas_edgelist(df = diss_gdf, source = 'sp_id', target = 'ep_id', edge_attr=True)
        
        # build the anti-graph, the graph of missing edges
        non_edge_list = []
        ag = nx.Graph()
        for ne in nx.non_edges(graph = g):
    
            # start and end coordinates
            sp = node_dict[ne[0]]
            ep = node_dict[ne[1]]
            curr_dist = points2distance(start = sp,
                                       end = ep,
                                       unit = 'miles')
            ag.add_edge(u_of_edge= ne[0], v_of_edge = ne[1], dist = curr_dist)
            line_string = LineString([sp, ep])
            non_edge_list.append([ne[0], ne[1], curr_dist, 'source', line_string])
            non_edge_list.append([ne[1], ne[0], curr_dist, 'target', line_string])
        
        non_edge_df = pd.DataFrame(data = non_edge_list, columns = ['source', 'target', 'dist', 'desc', 'geometry'])
        
        # aggregate by source or target, the values will be the same
        source_agg_df = non_edge_df[['source', 'dist']].groupby(['source']).agg(tot_dist = ('dist', 'sum')).reset_index()
        source_agg_df = source_agg_df.sort_values(by = ['tot_dist'], ascending = False)
        #print(source_agg_df.head())
        
        # furthest point, the current node
        curr_node = source_agg_df['source'].iloc[0]
        
        sp_df = diss_gdf[['snd_group', 'sp_id', 'sp_coords']].copy()
        ep_df = diss_gdf[['snd_group', 'ep_id', 'ep_coords']].copy()
        
        sp_gdf = gpd.GeoDataFrame(data = sp_df, geometry = sp_df['sp_coords'].map(lambda x: Point(x)),
                                  crs = 4326).drop(labels = ['sp_coords'], axis = 1)
        ep_gdf = gpd.GeoDataFrame(data = ep_df, geometry = ep_df['ep_coords'].map(lambda x: Point(x)),
                                  crs = 4326).drop(labels = ['ep_coords'], axis = 1)
        
        output_file_name = 'start_points.gpkg'
        ofpn = os.path.join(file_path, output_file_name)
        #sp_gdf.to_file(filename = ofpn, driver = 'GPKG')
        
        output_file_name = 'end_points.gpkg'
        ofpn = os.path.join(file_path, output_file_name)
        #ep_gdf.to_file(filename = ofpn, driver = 'GPKG')            
           
        # start with all of the ids
        
        tot_node_order = [curr_node]
        missing_segment_df_list = []
        
        # go end-start | end-start
        step_count = 0
        #print(step_count, curr_node)
        while len(tot_node_order) < len(g):
            # immediate successor on the known graph
            curr_node = nx.dfs_successors(G = g, source = curr_node, depth_limit=1)[curr_node][0]
            step_count += 1
            #print(step_count, curr_node)
            tot_node_order.append(curr_node)
            
            temp_dist_list = []
            dfs = nx.dfs_successors(G = ag, source = curr_node, depth_limit=1)
            for dest_node in dfs[curr_node]:
                curr_dist = ag.get_edge_data(u = curr_node, v = dest_node)['dist']
                if dest_node not in tot_node_order:
                    # get coords as well
                    source_coords = node_dict[curr_node]
                    end_coords = node_dict[dest_node]
                    line_string = LineString([source_coords, end_coords])
                    temp_list = [curr_node, dest_node, curr_dist, line_string]
                    temp_dist_list.append(temp_list)
                
            temp_df = pd.DataFrame(data = temp_dist_list, columns = ['source', 'dest', 'dist', 'geometry'])    
            temp_df = temp_df.sort_values(by = 'dist')
            
            if temp_df.shape[0] > 0:
                temp_df = temp_df.iloc[:1]
                
                missing_segment_df_list.append(temp_df)
                curr_node = temp_df['dest'].iloc[0]
                tot_node_order.append(curr_node)          
                step_count += 1
                #print(step_count, curr_node)
                
        
        miss_seg_df = pd.concat(objs = missing_segment_df_list)
        miss_seg_df['ord_stname'] = sn
        
        miss_seg_gdf = gpd.GeoDataFrame(data = miss_seg_df, 
                                        geometry = miss_seg_df['geometry'],
                                        crs = 4326)
        
        output_gdf_list.append(miss_seg_gdf)

if len(output_gdf_list) > 0:
    miss_seg_gdf = pd.concat(output_gdf_list)
    # write this out
    if len(s_names) < 100:
        output_file_name = 'temp_miss_seg.gpkg'
    else:
        output_file_name = 'miss_seg.gpkg'
    ofpn = os.path.join(file_path, output_file_name)
    miss_seg_gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)
    
    # the exploded segements
    if len(ex_gdf_list):
        ex_gdf = pd.concat(ex_gdf_list)
        print(ex_gdf.head())
        # write this out
        if len(s_names) < 100:
            output_file_name = 'temp_exploded_street_segments.gpkg'
        else:
            output_file_name = 'exploded_street_segments.gpkg'        
        ofpn = os.path.join(file_path, output_file_name)
        ex_gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)

    # the exploded segements
    if len(ex_ex_gdf_list):
        ex_ex_gdf = pd.concat(ex_ex_gdf_list)
        # write this out
        if len(s_names) < 100:
            output_file_name = 'temp_extra_exploded_street_segments.gpkg'
        else:
            output_file_name = 'extra_exploded_street_segments.gpkg'
        ofpn = os.path.join(file_path, output_file_name)
        ex_ex_gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)

BEACON AVE S
  ord_street_st_type ord_street ord_st_type    ord_stname  snd_group  \
0         BEACON AVE     BEACON         AVE  BEACON AVE S          1   
1         BEACON AVE     BEACON         AVE  BEACON AVE S          2   
2         BEACON AVE     BEACON         AVE  BEACON AVE S          3   
3         BEACON AVE     BEACON         AVE  BEACON AVE S          4   
4         BEACON AVE     BEACON         AVE  BEACON AVE S          5   

                                            geometry   geom_type  
0  LINESTRING (-122.27016 47.50448, -122.27002 47...  LineString  
1  LINESTRING (-122.27981 47.52068, -122.27877 47...  LineString  
2  LINESTRING (-122.28067 47.52076, -122.28066 47...  LineString  
3  LINESTRING (-122.31908 47.58571, -122.31898 47...  LineString  
4  LINESTRING (-122.30513 47.56194, -122.30516 47...  LineString  
