## Part 02: Export nodes and select working streets
michael babb  
2024 11 24

In [1]:
# standard
import os
import collections

In [2]:
# external
from concave_hull import concave_hull
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely

In [3]:
# custom
import run_constants as rc
from utils import create_name, generate_street_end_vertices, write_gdf, write_json

# load the street network

In [4]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_SND_IN_FILE_NAME)

In [5]:
gdf = gpd.read_file(filename = fpn)

In [6]:
gdf.shape

(34378, 39)

In [7]:
gdf['ord_stname_unique'].unique().shape

(3696,)

# generate street end vertices - these will be helpful with identification in subsequent steps

In [8]:
# can we generate the vertices? yes... but let's check what type of geometry we are working with
gdf['geometry'].map(lambda x: x.geom_type).value_counts()

geometry
MultiLineString    34378
Name: count, dtype: int64

In [9]:
# everything is a MultLineString??? is that necessary?
# we can call the gdf.explode() function to convert MultiLineStrings to LineStrings
# if the record counts are the same, we can convert everything to LineStrings 
test_gdf = gdf.explode()

In [10]:
test_gdf.shape

(34378, 39)

In [11]:
# nope! The number of records is the same, indicating that MultiLineStrings are not necessary
gdf = test_gdf.copy()

In [12]:
gdf['geometry'].geom_type.value_counts()

LineString    34378
Name: count, dtype: int64

In [13]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [14]:
write_gdf(gdf = node_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name= rc.S02_NODE_OUT_FILE_NAME)

# import the manually classified streets to update streets without a classification. 

In [15]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_BST_IN_FILE_NAME)

In [16]:
blank_street_type_df = pd.read_excel(io = fpn)

In [17]:
blank_street_type_df.head()

Unnamed: 0,ord_stname_concat,ord_street_type_fix
0,17TH AVE NW WKWY,WKWY
1,21ST AV SW TURN,RN
2,59TH AV SW TURN,ST
3,AIRPORT FY NB,SR
4,AIRPORT FY SB,SR


In [18]:
blank_street_type_df.shape

(80, 2)

In [19]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion',
       'ord_stname_unique', 'geometry'],
      dtype='object')

In [20]:
test_join = pd.merge(left = gdf, right = blank_street_type_df, how = 'outer', indicator = True)

In [21]:
test_join['_merge'].value_counts()

_merge
left_only     33662
both            716
right_only        0
Name: count, dtype: int64

In [22]:
gdf = test_join.copy()

In [23]:
gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type'] = gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type_fix']

In [24]:
gdf['ord_street_type'].unique()

array(['RP', 'AVE', 'LN', 'PL', 'TER', 'RD', 'CT', 'BR', 'WKWY', 'TRL',
       'RN', 'ST', 'ET', 'DR', 'CIR', 'PKWY', 'SR', 'WAY', 'AL', 'RR',
       'BLVD', 'OP', 'STCR', 'IS', 'MALL', 'PZ', 'LOOP', 'FLYOVER',
       'VIEW', 'HWY'], dtype=object)

In [25]:
gdf['ord_street_type'].isna().value_counts()

ord_street_type
False    34378
Name: count, dtype: int64

In [26]:
gdf = gdf.drop(labels = ['ord_street_type_fix', '_merge'], axis = 1)

# keep only streets in Seattle

In [27]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

In [28]:
gdf.shape

(27891, 39)

In [29]:
gdf['ord_stname_concat'].unique().shape

(2795,)

# write the full seattle streets to disk

In [30]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_SND_FULL_OUT_FILE_NAME)

# keep only streets with segment_type == 1  
These are actual streets.  
Everything else not a street (rail, trail, staircase, etc...).  
`segment_type == 15` is a stub, a dangling piece of a street, usually around 50 feet or less.  
Very short! There are 362 of them.

In [31]:
gdf['segment_type'].value_counts()

segment_type
1     25789
15      362
7       350
6       332
4       215
3       205
8       203
9       175
13      126
2        49
5        45
11       33
10        5
12        1
14        1
Name: count, dtype: int64

In [32]:
# before...
gdf.shape

(27891, 39)

In [33]:
gdf = gdf.loc[gdf['segment_type'] == 1, :].copy()
# after...
gdf.shape

(25789, 39)

In [34]:
gdf['ord_stname_unique'].unique().shape

(2462,)

# update the city portion variable

In [35]:
# we can quickly identify the central streets by performing some analysis in qGIS, 
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_CENTRAL_STREETS_IN_FILE_NAME)
cs_df = pd.read_csv(filepath_or_buffer=fpn)


In [36]:
# check
cs_df.head()

Unnamed: 0,snd_id,temp_city_portion
0,11524,CNTR
1,16184,CNTR
2,16185,CNTR
3,16187,CNTR
4,16289,CNTR


In [37]:
# join
test_join = pd.merge(left = gdf, right = cs_df, how = 'outer', indicator = True)
test_join['_merge'].value_counts()

_merge
left_only     24082
both           1707
right_only        0
Name: count, dtype: int64

In [38]:
test_join['ord_stname_unique'].unique().shape

(2462,)

In [39]:
check_df = test_join[['ord_stname_unique', 'city_portion']].drop_duplicates()

In [40]:
my_counter = collections.Counter(check_df['ord_stname_unique'])

In [41]:
test_join.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion',
       'ord_stname_unique', 'geometry', 'temp_city_portion', '_merge'],
      dtype='object')

In [42]:
test_join.loc[test_join['_merge'] == 'both', 'city_portion'] = \
test_join.loc[test_join['_merge'] == 'both', 'temp_city_portion']

In [43]:
test_join['ord_stname_unique'] = test_join.apply(func = create_name, axis = 1)

In [44]:
# drop and overwrite
drop_col_names = ['temp_city_portion', '_merge']
gdf = test_join.drop(labels = drop_col_names, axis = 1).copy()

In [45]:
write_gdf(gdf = test_join, output_file_path = rc.OUTPUT_FILE_PATH, output_file_name='test_fucker.gpkg')

# draw a convex hull around each group of city streets

In [46]:
data_list = []
geom_list = []
for cp in gdf['city_portion'].unique().tolist():
    print(cp)
    temp_geoms = gdf.loc[gdf['city_portion'] == cp, 'geometry']
    
    # create the convex hull
    geoms = shapely.unary_union(geometries = temp_geoms)
    cvx_hull = shapely.convex_hull(geometry = geoms)
    temp_list = [cp, 'convex']
    data_list.append(temp_list)
    geom_list.append(cvx_hull)

    # create a concave hull
    point_list = [] 
    for curr_line in temp_geoms:
        for curr_coords in curr_line.coords:
            point_list.append(curr_coords)
    
    ch = concave_hull(points = point_list)
    my_poly = shapely.Polygon(ch)
    data_list.append([cp, 'concave'])
    geom_list.append(my_poly)

NW
N
NE
CNTR
E
W
S
SW


In [47]:
output_gdf = gpd.GeoDataFrame(data = data_list, columns = ['city_portion', 'hull_type'], geometry = geom_list, crs = 'epsg:4326')

In [48]:
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_CITY_SECTIONS_OUT_FILE_NAME)

In [49]:
# let's export this to a LineString file as well
output_gdf['geometry'] = output_gdf['geometry'].boundary

In [50]:
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH, 
          output_file_name=rc.S02_CITY_SECTIONS_LINES_OUT_FILE_NAME)

In [51]:
output_gdf.head()

Unnamed: 0,city_portion,hull_type,geometry
0,NW,convex,"LINESTRING (-122.35768 47.65214, -122.35784 47..."
1,NW,concave,"LINESTRING (-122.40637 47.67297, -122.40637 47..."
2,N,convex,"LINESTRING (-122.33299 47.61852, -122.33427 47..."
3,N,concave,"LINESTRING (-122.3577 47.64778, -122.3577 47.6..."
4,NE,convex,"LINESTRING (-122.3046 47.64749, -122.31007 47...."


In [53]:
temp_output_gdf = output_gdf.loc[output_gdf['hull_type'] == 'concave', ['city_portion', 'geometry']]
temp_output_gdf.columns = ['cp', 'geometry']
temp_output_gdf['cp'] = temp_output_gdf['cp'].str.replace('CNTR', 'C')
cs_json = temp_output_gdf.to_json(drop_id=True, to_wgs84=True)


In [54]:
# let's save this a geojson
write_json(json_data=cs_json, output_file_path='../maps', output_file_name='city_sections.geojson',
           var_name = None)

city_sections


# remove the following streets types:

AL: alley  
TRL: trail  
OP: overpass  
IS: interstate  
SR: state route  
RR: rail  
FLYOVER: flyover  
STCR: streetcar  
ET: extension  
RN: turn  
RP: highway ramps  
WKWY: walkways  

In [None]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER',
                         'STCR', 'ET', 'RN', 'RP', 'WKWY']

In [None]:
# before...
gdf.shape

In [None]:
# what street types are left?
gdf['ord_street_type'].unique()

In [None]:
# remove...
gdf = gdf.loc[-gdf['ord_street_type'].isin(street_type_to_remove), :].copy()

In [None]:
# after..
gdf.shape

In [None]:
# what street types are left?
gdf['ord_street_type'].unique()

# remove other short segments that appear to be invalid.

In [None]:
# uncomment to remove the erroneous segments
err_segments = pd.read_csv(filepath_or_buffer='../data/streets_to_remove.txt')


In [None]:
# examine the erroneous segments
tgdf = gdf.loc[gdf['snd_id'].isin(err_segments['snd_id']), :].copy()
tgdf['gis_seg_length'].describe()
# very short!

In [None]:
# check names...
tgdf['ord_stname_concat'].unique().tolist()

In [None]:
tgdf['segment_type'].unique()

In [None]:
tgdf.head()

In [None]:
# remove the erroneous segments
gdf = gdf.loc[-gdf['snd_id'].isin(err_segments['snd_id']), :].copy()

In [None]:
# after
gdf.shape

In [None]:
# unique roads
gdf['ord_stname_unique'].unique().shape

In [None]:
# total miles in Seattle, based on select road ways
gdf['gis_seg_length'].sum() / 5280

# save as a gdf - overwrite

In [None]:
write_gdf(gdf = gdf, output_file_path = rc.OUTPUT_FILE_PATH,
          output_file_name =  rc.S02_SND_WORKING_OUT_FILE_NAME)