In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?
# step 01: import data / perform some initial formatting

In [2]:
# standard
import os

In [3]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd

# load the streent network shapefile as a gpd

In [4]:
# file paths
input_file_path = 'H:/project/seattle_streets/data/WGS84/' 
output_file_path = 'H:/project/seattle_streets/data/'

In [5]:
file_name = 'Street_Network_Database.shp'

In [6]:
fpn = os.path.join(input_file_path, file_name)

In [7]:
gdf = gpd.read_file(filename = fpn)

# LOWER CASE AND RENAME COLUMNS

In [8]:
gdf.columns = [cn.lower() for cn in gdf.columns]

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacod', 'citycode',
       'stname_id', 'st_code', 'arterial_c', 'segment_ty', 'agency_cod',
       'access_cod', 'divided_co', 'structure_', 'legalloc_c', 'vehicle_us',
       'gis_seg_le', 'l_adrs_fro', 'l_adrs_to', 'r_adrs_fro', 'r_adrs_to',
       'ord_pre_di', 'ord_street', 'ord_stre_1', 'ord_suf_di', 'ord_stname',
       'l_city', 'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip',
       'sndseg_upd', 'compkey', 'comptype', 'unitid', 'unitid2', 'shape_leng',
       'geometry'],
      dtype='object')

In [10]:
# drop the shape_leng column, this is a shapefile artifact
gdf = gdf.drop(labels = ['shape_leng'], axis = 1)

In [11]:
# rename columns
rename_dict = {'gis_seg_le':'gis_seg_length',
               'l_adrs_fro':'l_adrs_from',
               'r_adrs_fro':'r_adrs_from',
               'ord_pre_di':'ord_pre_dir',
               'ord_stre_1':'ord_street_type',
               'ord_suf_di':'ord_suf_dir'}

In [12]:
# rename columns to get around the 10-character shapefile limit
gdf = gdf.rename(columns = rename_dict)

# ADD A COLUMN INDICATING WHICH PORTION OF THE CITY THE STREET IS IN

In [13]:
# combine the ord_pre_dir and the ord_suf_dir to get the portion of the city.

In [14]:
col_names = ['ord_pre_dir', 'ord_suf_dir']

In [15]:
for cn in col_names:
    gdf[cn] = gdf[cn].fillna('')

In [16]:
def create_city_portion(my_row:pd.Series):
    
    opd = my_row['ord_pre_dir']
    osd = my_row['ord_suf_dir']

    outcome = opd + ' ' + osd
    outcome = outcome.strip()
    
    return outcome

In [17]:
gdf['city_portion'] = gdf.apply(create_city_portion, axis = 1)

In [18]:
gdf['city_portion'].unique()

array(['SW', 'N', 'NE', 'S', 'W', 'E', '', 'NW', 'NB', 'WB', 'SB', 'EB'],
      dtype=object)

# WRITE THE GEODATAFRAME TO DISK AS GPKG

In [19]:
# file path
output_file_name = 'Street_Network_Database.gpkg'

In [20]:
ofpn = os.path.join(output_file_path, output_file_name)

In [21]:
# reproject to wgs 84
gdf = gdf.to_crs(crs = 'epsg:4326')

In [22]:
gdf.to_file(filename = ofpn, driver = 'GPKG', index = False)

# EXPORT THE BLANK STREET TYPES TO AN EXCEL FILE FOR MANUAL INSPECTION

In [23]:
blank_street_type_df = gdf.loc[gdf['ord_street_type'].isna(), 'ord_stname'].drop_duplicates().to_frame()

In [24]:
output_file_name = 'blank_street_type.xlsx'

In [25]:
ofpn = os.path.join(output_file_path, output_file_name)

In [26]:
blank_street_type_df.to_excel(excel_writer=ofpn, index = False)