In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?

In [2]:
# standard
import os

In [41]:
# external
from itertools import combinations, product
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
from shapely.geometry import LineString
from shapely import line_merge

In [4]:
# custom
from geodataio.geo_operations import points2distance, calculate_initial_compass_bearing

# load the shapefile as a gpd

In [5]:
# file path
file_path = 'H:/project/seattle_streets/data/' 

In [6]:
file_name = 'Street_Network_Database.gpkg'

In [7]:
fpn = os.path.join(file_path, file_name)

In [8]:
gdf = gpd.read_file(filename = fpn)

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry'],
      dtype='object')

In [10]:
# import the classified streets

In [11]:
file_name = 'blank_street_type_modified.xlsx'

In [12]:
fpn = os.path.join(file_path, file_name)

In [13]:
blank_street_type_df = pd.read_excel(io = fpn)

In [14]:
blank_street_type_df.head()

Unnamed: 0,ord_stname,ord_st_type
0,I5 SB,IS
1,BROADWAY E,ST
2,NW ESPLANADE,ST
3,FAUNTLEE CREST SW,ST
4,SR518,SR


In [15]:
gdf['ord_street_type'].unique()

array(['ST', 'AVE', 'CT', 'PL', 'DR', 'WAY', 'BLVD', 'LN', 'TRL', 'RD',
       'CIR', None, 'PKWY', 'TER', 'AL', 'VI', 'OP', 'RP', 'BR', 'LOOP',
       'HWY', 'ET', 'VIEW', 'PZ', 'MALL'], dtype=object)

In [16]:
gdf = pd.merge(left = gdf, right = blank_street_type_df, how = 'left')

In [17]:
gdf.loc[gdf['ord_st_type'].isna(), 'ord_st_type'] = gdf.loc[gdf['ord_st_type'].isna(), 'ord_street_type']

In [18]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_length', 'l_adrs_from', 'l_adrs_to', 'r_adrs_from',
       'r_adrs_to', 'ord_pre_dir', 'ord_street', 'ord_street_type',
       'ord_suf_dir', 'ord_stname', 'l_city', 'l_state', 'l_zip', 'r_city',
       'r_state', 'r_zip', 'sndseg_upd', 'compkey', 'comptype', 'unitid',
       'unitid2', 'city_portion', 'geometry', 'ord_st_type'],
      dtype='object')

In [19]:
# PASS 1: FOCUS ON JUST THE STREETS, REGARDLESS OF DIRECTION AND TYPE

In [20]:
gdf['ord_street_st_type'] = gdf['ord_street'] + ' ' + gdf['ord_st_type']

In [21]:
agg_df = gdf[['ord_street_st_type']].drop_duplicates().copy()

In [22]:
agg_df.head()

Unnamed: 0,ord_street_st_type
0,STEVENS ST
1,37TH ST
2,110TH ST
3,106TH ST
4,56TH AVE


In [23]:
s_names = agg_df['ord_street_st_type'].tolist()

In [24]:
s_names = ['GALER ST']

In [25]:
output_list = []
output_gdf_list = []
for sn in s_names:
    # get a street by name and type
    print(sn)
    temp_gdf = gdf.loc[gdf['ord_street_st_type'] == sn, :].copy()
    temp_gdf['snd_group'] = int(0)

    g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
    n_connected_components = nx.number_connected_components(g)
    # try and get the edges in each subgrpah
    # get each sub graph
    snd_group_count = -1
    for c in nx.connected_components(g):
        edge_data_list = g.subgraph(c).edges.data()
        snd_id_list = []
        for edl in edge_data_list:            
            #print(edl[2]['snd_id'])
            snd_id_list.append(edl[2]['snd_id'])
        snd_group_count += 1
        temp_gdf.loc[temp_gdf['snd_id'].isin(snd_id_list), 'snd_group'] = snd_group_count        

    # now, dissolve by the ord_street_st_type and snd_group
    diss_gdf = temp_gdf[['ord_street', 'ord_st_type', 'snd_group', 'geometry']].dissolve(by = ['ord_street', 'ord_st_type', 'snd_group'],
                                                                                        as_index = False)
    # create single linestrings!
    diss_gdf['geometry'] = diss_gdf['geometry'].map(lambda x: line_merge(x))
    diss_gdf['geom_type'] = diss_gdf['geometry'].map(lambda x: x.geom_type)
        
    
    # let's split and then recombine
    # output_gdf_list.append(temp_gdf)
    # dissolve the geometry
        
        

GALER ST


In [26]:
diss_gdf.head()

Unnamed: 0,ord_street,ord_st_type,snd_group,geometry,geom_type
0,GALER,ST,0,"LINESTRING (-122.37169 47.63231, -122.37108 47...",LineString
1,GALER,ST,1,"LINESTRING (-122.32607 47.63226, -122.32496 47...",LineString
2,GALER,ST,2,"LINESTRING (-122.31250 47.63233, -122.31192 47...",LineString
3,GALER,ST,3,"LINESTRING (-122.28478 47.63218, -122.28407 47...",LineString
4,GALER,ST,4,"LINESTRING (-122.32021 47.63228, -122.31915 47...",LineString


In [34]:
# we can't assume that start and end point are ordered in any way.
# so we need to do all product pairs

In [27]:
# now we need to extract the start and end points of each line, to compute the distance between each segment
start_point_dict = {i_id:sp for i_id, sp in enumerate(diss_gdf['geometry'].map(lambda x: x.coords[0]))}
end_point_dict = {i_id:ep for i_id, ep in enumerate(diss_gdf['geometry'].map(lambda x: x.coords[-1]))}

In [36]:
from itertools import combinations, product

In [42]:
my_point = start_point_dict[0]

In [43]:
my_point

(-122.37169355299994, 47.632312513000045)

In [46]:
output_list = []
for sp_id, ep_id in product(sorted(start_point_dict.keys()), sorted(end_point_dict.keys())):
    if sp_id != ep_id:
        sp = start_point_dict[sp_id]
        ep = end_point_dict[ep_id]

        dist = points2distance(start = sp, end = ep, unit = 'miles')
        temp_list = [sp_id, ep_id, dist]
        output_list.append(temp_list)
        
    

In [47]:
dist_df = pd.DataFrame(data = output_list, columns = ['sp_id', 'ep_id', 'dist'])

In [48]:
dist_df.head()

Unnamed: 0,sp_id,ep_id,dist
0,0,1,2.176284
1,0,2,3.406419
2,0,3,4.337022
3,0,4,2.471104
4,0,5,0.219484


In [54]:
# take the min distance by start point and end point
def compute_linestring(row):
    sp_id = row['sp_id']
    ep_id = row['ep_id']

    sp = start_point_dict[sp_id]
    ep = end_point_dict[ep_id]
    line_string = LineString([sp, ep])

    return line_string

In [55]:
geoms = dist_df.apply(compute_linestring, axis = 1)

In [56]:
dist_gdf = gpd.GeoDataFrame(data = dist_df, geometry = geoms, crs = 4326)

In [57]:
output_file_name = 'test_distance.gpkg'

In [58]:
ofpn = os.path.join(file_path, output_file_name)

In [59]:
dist_gdf.to_file(filename = ofpn, driver = 'GPKG')

In [60]:
# 2024 07 21: 1829
# i need to look at each segment and each start point and end point in each segement
# couldn't I use an rtree to build the point cloud and then return the nearest start/end point?
# yes, that would be the easiest. T



In [40]:
testo

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (0, 10),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (1, 8),
 (1, 9),
 (1, 10),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (2, 8),
 (2, 9),
 (2, 10),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (3, 10),
 (4, 0),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (4, 10),
 (5, 0),
 (5, 1),
 (5, 2),
 (5, 3),
 (5, 4),
 (5, 5),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (5, 10),
 (6, 0),
 (6, 1),
 (6, 2),
 (6, 3),
 (6, 4),
 (6, 5),
 (6, 6),
 (6, 7),
 (6, 8),
 (6, 9),
 (6, 10),
 (7, 0),
 (7, 1),
 (7, 2),
 (7, 3),
 (7, 4),
 (7, 5),
 (7, 6),
 (7, 7),
 (7, 8),
 (7, 9),
 (7, 10),
 (8, 0),
 (8, 1),
 (8, 2),
 (8, 3),
 (8, 4),
 (8, 5),
 (8, 6),
 (8, 7),
 (8, 8),
 (8, 9),
 (8, 10),
 (9, 0),
 (9, 1),
 (9, 2),
 (9, 3),
 (9, 4),
 (9, 5),
 (9, 6),
 (9, 7),
 (9, 8),
 (9, 9),
 (9, 10),


In [33]:
sorted(start_point_dict.keys())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [None]:
# place this stuff into a dictionary
end

In [None]:
# now we need to compute the combination of the end points




In [None]:
output_file_name = 'galer.gpkg'

In [None]:
ofpn = os.path.join(file_path, output_file_name)

In [None]:
diss_gdf.to_file(filename = ofpn, driver = 'GPKG', layer = 'testo')

In [None]:
temp_gdf.shape

In [None]:
diss_gdf.head()

In [None]:
# let's count the parts
testo = diss_gdf.explode(index_parts = True)

In [None]:
mygeom = diss_gdf['geometry'].iloc[0]

In [None]:
from shapely.ops import linemerge

In [None]:
outcome = linemerge(mygeom)

In [None]:
outcome

In [None]:
output_list = []
output_gdf_list = []
for sn in s_names:
    print(sn)
    temp_gdf = gdf.loc[gdf['ord_street'] == sn, ['ord_street', 'ord_st_type', 'geometry']].dissolve(by = ['ord_street', 'ord_st_type'], as_index = False)
    temp_gdf = temp_gdf.explode(ignore_index = False, index_parts = True).reset_index(names = ['ord_street_id', 'ord_street_part_id'])
    # let's split and then recombine
    output_gdf_list.append(temp_gdf)
        # dissolve the geometry
        
        

In [None]:
output_gdf = pd.concat(output_gdf_list)

In [None]:
output_gdf.head()

In [None]:
output_gdf.shape

In [None]:
# let's put stuff in a directed graph

In [None]:
# so, now we need to find the distance betwee the start and stop ids

In [None]:
# group to get counts
agg_df = output_gdf['ord_street_id'].value_counts().to_frame()




In [None]:
agg_df.head()

In [None]:
output_list = []
output_gdf_list = []
for sn in s_names:
    print(sn)
    temp_gdf = gdf.loc[gdf['ord_stname'] == sn, ['ord_strname', 'geometry']].copy()
    
    #g = nx.from_pandas_edgelist(df = temp_gdf, source = 'f_intr_id', target = 't_intr_id', edge_attr=True)
    n_connected_components = nx.number_connected_components(g)
    temp_list = [sn, n_connected_components]
    output_list.append(temp_list)
    if n_connected_components > 1:
        # dissolve this lovely
        diss_gdf = temp_gdf.dissolve(by = 'ord_stname')
        diss_gdf['n_parts'] = n_connected_components
        output_gdf_list.append(diss_gdf)
        # dissolve the geometry
        
        

In [None]:
con_df = pd.DataFrame(data = output_list, columns = ['ord_stname', 'n_parts'])

In [None]:
con_df = con_df.sort_values(by = ['n_parts'], ascending = False)

In [None]:
con_df.head()

In [None]:
con_df['n_parts'].describe()

In [None]:
my_gdf = pd.concat(objs = output_gdf_list, axis = 0)

In [None]:
my_gdf.shape

In [None]:
output_file_path = 'H:/project/seattle_streets/data/'

In [None]:
output_file_name = 'multi_part_streets.gpkg'

In [None]:
ofpn = os.path.join(output_file_path, output_file_name)

In [None]:
my_gdf.to_file(filename = ofpn, driver = 'GPKG')