## Part 02: Export nodes and select working streets
michael babb  
2024 11 24

In [None]:
# standard
import os
import collections

In [None]:
# external
from concave_hull import concave_hull
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from shapely import Point, MultiPoint

In [None]:
# custom
import run_constants as rc
from utils import create_name, generate_street_end_vertices, write_gdf, write_json, keep_largest_geometry

# load the street network

In [None]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_SND_IN_FILE_NAME)

In [None]:
gdf = gpd.read_file(filename = fpn)

In [None]:
gdf.shape

In [None]:
gdf['ord_stname_unique'].unique().shape

# generate street end vertices - these will be helpful with identification in subsequent steps

In [None]:
# can we generate the vertices? yes... but let's check what type of geometry we are working with
gdf['geometry'].map(lambda x: x.geom_type).value_counts()

In [None]:
# everything is a MultLineString??? is that necessary?
# we can call the gdf.explode() function to convert MultiLineStrings to LineStrings
# if the record counts are the same, we can convert everything to LineStrings 
test_gdf = gdf.explode()

In [None]:
test_gdf.shape

In [None]:
# nope! The number of records is the same, indicating that MultiLineStrings are not necessary
gdf = test_gdf.copy()

In [None]:
gdf['geometry'].geom_type.value_counts()

In [None]:
gdf, node_gdf = generate_street_end_vertices(gdf = gdf)

In [None]:
write_gdf(gdf = node_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name= rc.S02_NODE_OUT_FILE_NAME)

# import the manually classified streets to update streets without a classification. 

In [None]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_BST_IN_FILE_NAME)

In [None]:
blank_street_type_df = pd.read_excel(io = fpn)

In [None]:
blank_street_type_df.head()

In [None]:
blank_street_type_df.shape

In [None]:
gdf.columns

In [None]:
test_join = pd.merge(left = gdf, right = blank_street_type_df, how = 'outer', indicator = True)

In [None]:
test_join['_merge'].value_counts()

In [None]:
gdf = test_join.copy()

In [None]:
gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type'] = gdf.loc[gdf['ord_street_type'].isna(), 'ord_street_type_fix']

In [None]:
gdf['ord_street_type'].unique()

In [None]:
gdf['ord_street_type'].isna().value_counts()

In [None]:
gdf = gdf.drop(labels = ['ord_street_type_fix', '_merge'], axis = 1)

# keep only streets in Seattle

In [None]:
gdf = gdf.loc[(gdf['l_city'] == 'SEATTLE') |
(gdf['r_city'] == 'SEATTLE'), :].copy()

In [None]:
gdf.shape

In [None]:
gdf['ord_stname_concat'].unique().shape

# write the full seattle streets to disk

In [None]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_SND_FULL_OUT_FILE_NAME)

# keep only streets with segment_type == 1  
These are actual streets.  
Every other segement_type is not a street (rail, trail, staircase, etc...).  
`segment_type == 15` is a stub, a dangling piece of a street, usually around 50 feet or less.  
Very short! There are 362 of them.

In [None]:
gdf['segment_type'].value_counts()

In [None]:
# before...
gdf.shape

In [None]:
gdf = gdf.loc[gdf['segment_type'] == 1, :].copy()
# after...
gdf.shape

In [None]:
gdf['ord_stname_unique'].unique().shape

# update the city sector variable

In [None]:
# we can quickly identify the central streets by performing some analysis in qGIS.
# the rc.S02_CENTRAL_STREETS_IN_FILE_NAME is the result of that analysis
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S02_CENTRAL_STREETS_IN_FILE_NAME)
print(fpn)
cs_df = pd.read_csv(filepath_or_buffer=fpn)

In [None]:
# check
cs_df.head()

In [None]:
# join
test_join = pd.merge(left = gdf, right = cs_df, how = 'outer', indicator = True)
test_join['_merge'].value_counts()

In [None]:
test_join['ord_stname_unique'].unique().shape

In [None]:
test_join.columns

In [None]:
# update the city city_sector variable with the temporary city sector variables
test_join.loc[test_join['_merge'] == 'both', 'city_sector'] = \
test_join.loc[test_join['_merge'] == 'both', 'temp_city_sector']

In [None]:
# create the unique name
test_join['ord_stname_unique'] = test_join.apply(func = create_name, axis = 1)

In [None]:
# drop and overwrite
drop_col_names = ['temp_city_sector', '_merge']
gdf = test_join.drop(labels = drop_col_names, axis = 1).copy()

# draw a convex hull and a concave hull around each group of city streets

In [None]:
data_list = []
geom_list = []
for cp in gdf['city_sector'].unique().tolist():
    print(cp)
    temp_geoms = gdf.loc[gdf['city_sector'] == cp, 'geometry']
    
    # get the vertices of the line strings
    # as shapely coordinates
    geom_point_list = []
    # as an x-y tuples
    point_list = []
    for curr_line in temp_geoms:
        for curr_coords in curr_line.coords:
            geom_point_list.append(Point(curr_coords))
            point_list.append(curr_coords)
    
    # create the cnvx_hull
    cnvx_hull = shapely.convex_hull(geometry = MultiPoint(geom_point_list))
        
    temp_list = [cp, 'convex']
    data_list.append(temp_list)
    geom_list.append(cnvx_hull)

    # create a concave hull
    cncv_hull = concave_hull(points = point_list)
    # create a shapely polygon
    cncv_poly = shapely.Polygon(cncv_hull)
    data_list.append([cp, 'concave'])
    geom_list.append(cncv_poly)

In [None]:
# build a geodataframe
output_gdf = gpd.GeoDataFrame(data = data_list, columns = ['city_sector', 'hull_type'], geometry = geom_list, crs = 'epsg:4326')
# fix any odd geometries
output_gdf['geometry'] = output_gdf['geometry'].make_valid()

In [None]:
# now, let's project to WGS 84 UTM Zone 10N to perform some area calculations

In [None]:
output_gdf = output_gdf.to_crs(epsg=32610)

In [None]:
output_gdf.head()

In [None]:
output_gdf = keep_largest_geometry(gdf = output_gdf, group_col_names=['city_sector', 'hull_type'])

In [None]:
output_gdf.head()

In [None]:
# re-project back to wgs-84
output_gdf = output_gdf.to_crs(epsg = 4326)

In [None]:
# save it!
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name=rc.S02_CITY_SECTORS_OUT_FILE_NAME)

In [None]:
# let's export this to a LineString file as well
output_gdf['geometry'] = output_gdf['geometry'].boundary
output_gdf['geometry'] = output_gdf['geometry'].make_valid()

In [None]:
write_gdf(gdf = output_gdf, output_file_path=rc.OUTPUT_FILE_PATH, 
          output_file_name=rc.S02_CITY_SECTORS_LINES_OUT_FILE_NAME)

# remove the following streets types:

AL: alley  
TRL: trail  
OP: overpass  
IS: interstate  
SR: state route  
RR: rail  
FLYOVER: flyover  
STCR: streetcar  
ET: extension  
RN: turn  
RP: highway ramps  
WKWY: walkways  

In [None]:
# remove streets of the following type
street_type_to_remove = [ 'AL', 'TRL', 'OP', 'IS', 'SR', 'RR', 'FLYOVER',
                         'STCR', 'ET', 'RN', 'RP', 'WKWY']

In [None]:
# before...
gdf.shape

In [None]:
# what street types are left?
gdf['ord_street_type'].unique()

In [None]:
# remove...
gdf = gdf.loc[-gdf['ord_street_type'].isin(street_type_to_remove), :].copy()

In [None]:
# after..
gdf.shape

In [None]:
# what street types are left?
gdf['ord_street_type'].unique()

# remove other short segments that appear to be invalid.

In [None]:
# uncomment to remove the erroneous segments
err_segments = pd.read_csv(filepath_or_buffer='../data/streets_to_remove.txt')


In [None]:
# examine the erroneous segments
tgdf = gdf.loc[gdf['snd_id'].isin(err_segments['snd_id']), :].copy()
tgdf['gis_seg_length'].describe()
# very short!

In [None]:
# check names...
tgdf['ord_stname_concat'].unique().tolist()

In [None]:
tgdf['segment_type'].unique()

In [None]:
tgdf.head()

In [None]:
# remove the erroneous segments
gdf = gdf.loc[-gdf['snd_id'].isin(err_segments['snd_id']), :].copy()

In [None]:
# after
gdf.shape

In [None]:
# unique roads
gdf['ord_stname_unique'].unique().shape

In [None]:
# total miles in Seattle, based on select road ways
gdf['gis_seg_length'].sum() / 5280

# save as a gdf - overwrite

In [None]:
write_gdf(gdf = gdf, output_file_path = rc.OUTPUT_FILE_PATH,
          output_file_name =  rc.S02_SND_WORKING_OUT_FILE_NAME)