## Part 02: Export nodes and select working streets
michael babb  
2024 11 24

In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely

In [3]:
# custom
import run_constants as rc
from utils import generate_street_end_vertices, write_gdf

# load the streetnetwork

In [4]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_SND_IN_FILE_NAME)

In [5]:
gdf = gpd.read_file(filename = fpn)

# generate street end vertices - these will be helpful with identification in subsequent steps

In [6]:
# can we generate the vertices? yes... but let's check what type of geometry we are working with
gdf['geometry'].map(lambda x: x.geom_type).value_counts()

geometry
MultiLineString    34378
Name: count, dtype: int64

In [7]:
# everything is a MultLineString??? is that necessary?
# we can call the gdf.explode() function to convert MultiLineStrings to LineStrings
# if the record counts are the same, we can convert everything to LineStrings 

In [8]:
test_gdf = gdf.explode()

In [9]:
test_gdf.shape

(34378, 38)

In [10]:
# nope! If MultiLineStrings were necessary, the number of records would increase.

In [11]:
gdf = test_gdf.copy()

In [12]:
gdf['geometry'].geom_type.value_counts()

LineString    34378
Name: count, dtype: int64

In [13]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [14]:
write_gdf(gdf = node_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name= rc.S02_NODE_OUT_FILE_NAME)

# import the classified streets - this classifies streets without a classification. 

In [15]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_BST_IN_FILE_NAME)

In [16]:
blank_street_type_df = pd.read_excel(io = fpn)

In [17]:
blank_street_type_df.head()

Unnamed: 0,ord_stname_concat,ord_street_type_fix
0,NE SUNRISE VISTA,ST
1,NW ESPLANADE,ST
2,BROADWAY E,ST
3,FAUNTLEE CREST SW,ST
4,NE FOREST VISTA,ST


In [18]:
gdf = pd.merge(left = gdf, right = blank_street_type_df, how = 'left')

In [19]:
gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type'] = gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type_fix']

In [20]:
gdf['ord_street_type'].unique()

array(['AVE', 'CT', 'ST', 'PL', 'LN', 'RD', 'DR', 'WAY', 'PKWY', 'BLVD',
       'CIR', 'TER', 'AL', 'RP', 'LOOP', 'SR', 'IS', 'OP', 'TRL', 'RR',
       'BR', 'WKWY', 'ET', 'STCR', 'VIEW', 'FLYOVER', 'HWY', 'RN', 'PZ',
       'MALL'], dtype=object)

In [21]:
gdf = gdf.drop(labels = ['ord_street_type_fix'], axis = 1)

# keep only streets in Seattle

In [22]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

# write the full seattle streets to disk

In [23]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_SND_FULL_OUT_FILE_NAME)

# export the individual street portions to make the different sections

In [24]:
# after performing some analysis in GIS, we can quickly identify the central streets

In [25]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_CENTRAL_STREETS_IN_FILE_NAME)

In [26]:
cs_df = pd.read_csv(filepath_or_buffer=fpn)

In [27]:
cs_df = cs_df.drop(labels = ['fid'], axis =  1)

In [28]:
# working_gdf
# winnow down the list of streets to get what we're looking for 

q_str = 'snd_feacode in (1, 5) and st_code in (0, ) and city_portion != \'WB\''
wgdf = gdf.query(expr = q_str)

# draw a convex hull around each group of city streets

In [29]:
data_list = []
geom_list = []
for cp in wgdf['city_portion'].unique().tolist():
    print(cp)
    if cp == 'CNTR':
        temp_geoms = wgdf.loc[(wgdf['snd_id'].isin(cs_df['snd_id'])) &
        (wgdf['city_portion'] == cp), 'geometry']
    else:
        temp_geoms = wgdf.loc[wgdf['city_portion'] == cp, 'geometry']
    
    # create the convex hull
    geoms = shapely.unary_union(geometries = temp_geoms)
    cvx_hull = shapely.convex_hull(geometry = geoms)
    temp_list = [cp, 'convex']
    data_list.append(temp_list)
    geom_list.append(cvx_hull)

S
E
NW
N
CNTR
NE
SW
W


In [30]:
output_gdf = gpd.GeoDataFrame(data = data_list, columns = ['city_portion', 'hull_type'], geometry = geom_list, crs = 'epsg:4326')

In [31]:
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_CITY_SECTIONS_OUT_FILE_NAME)

# keep only streets with segment_type == 1, these are actual streets

In [32]:
gdf = gdf.loc[gdf['segment_type'] == 1, :].copy()

# remove the following streets types:

AL: alley  
TRL: trail  
OP: overpass  
IS: interstate  
SR: state route  
RR: rail  
FLYOVER: flyover  
STCR: streetcar  
ET: extension  
RN: turn  
RP: highway ramps  
WKWY: walkways  

In [33]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER', 'STCR', 'ET', 'RN', 'RP', 'WKWY']

In [34]:
# before...
gdf.shape

(25789, 38)

In [35]:
# what street types are left?
gdf['ord_street_type'].unique()

array(['AVE', 'ST', 'PL', 'LN', 'RD', 'WAY', 'PKWY', 'DR', 'CT', 'BLVD',
       'CIR', 'TER', 'OP', 'ET', 'BR', 'VIEW', 'WKWY', 'FLYOVER', 'RN',
       'TRL', 'LOOP', 'SR', 'PZ'], dtype=object)

In [36]:
# remove...
gdf = gdf.loc[-gdf['ord_street_type'].isin(street_type_to_remove), :].copy()

In [37]:
# after..
gdf.shape

(25774, 38)

In [38]:
# what street types are left?
gdf['ord_street_type'].unique()

array(['AVE', 'ST', 'PL', 'LN', 'RD', 'WAY', 'PKWY', 'DR', 'CT', 'BLVD',
       'CIR', 'TER', 'BR', 'VIEW', 'LOOP', 'PZ'], dtype=object)

In [39]:
# before...
gdf.shape

(25774, 38)

In [40]:
# uncomment to remove the erroneous segments
err_segments = pd.read_csv(filepath_or_buffer='../data/streets_to_remove.txt')


In [41]:
# examine the erroneous segments
tgdf = gdf.loc[gdf['snd_id'].isin(err_segments['snd_id']), :].copy()
tgdf['gis_seg_length'].describe()
# very short!

count     1.000000
mean     23.369271
std            NaN
min      23.369271
25%      23.369271
50%      23.369271
75%      23.369271
max      23.369271
Name: gis_seg_length, dtype: float64

In [42]:
# check names...
tgdf['ord_stname_concat'].unique().tolist()

['15TH AVE W']

In [43]:
tgdf['segment_type'].unique()

array([1], dtype=int64)

In [45]:
tgdf.head()

Unnamed: 0,f_intr_id,t_intr_id,snd_id,snd_feacode,citycode,stname_id,st_code,arterial_code,segment_type,agency_code,...,r_city,r_state,r_zip,sndseg_update,compkey,comptype,unitid,unitid2,city_portion,geometry
18097,46825,8049,47787,1,1,65,40,0,1,3,...,SEATTLE,WA,98119,2009-05-22 00:00:00+00:00,0,0,,,W,"LINESTRING (-122.37617 47.6595, -122.37624 47...."


In [46]:
# remove the erroneous segments
gdf = gdf.loc[-gdf['snd_id'].isin(err_segments['snd_id']), :].copy()

In [47]:
# after
gdf.shape

(25773, 38)

In [48]:
# unique roads
gdf['ord_stname_concat'].unique().shape

(2455,)

In [49]:
# total miles
gdf['gis_seg_length'].sum() / 5280

1892.5011545316283

# save as a gdf - overwrite

In [50]:
write_gdf(gdf = gdf, output_file_path = rc.OUTPUT_FILE_PATH,
          output_file_name =  rc.S02_SND_WORKING_OUT_FILE_NAME)