# Part 01: Import and export the street data
michael babb  
2025 09 22

In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd

In [3]:
# custom
from geo_data_io.fc_df_spatial import write_gdf
import run_constants as rc
from utils import create_city_sector, create_name

# load the street network shapefile as a gpd

In [4]:
# setup the output paths
if not os.path.exists(rc.OUTPUT_FILE_PATH):
    os.makedirs(rc.OUTPUT_FILE_PATH)

In [5]:
fpn = os.path.join(rc.INPUT_FILE_PATH, rc.S01_SND_IN_FILE_NAME)

In [6]:
gdf = gpd.read_file(filename = fpn)

  return ogr_read(


In [7]:
gdf.shape

(34378, 37)

# lower case and rename columns

In [8]:
gdf.columns = [cn.lower() for cn in gdf.columns]

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'geometry'],
      dtype='object')

In [10]:
gdf.dtypes

f_intr_id                          int32
t_intr_id                          int32
snd_id                             int32
snd_feacode                        int32
citycode                           int32
stname_id                          int32
st_code                            int32
arterial_code                      int32
segment_type                       int32
agency_code                        int32
access_code                        int32
divided_code                       int32
structure_type                     int32
legalloc_code                      int32
vehicle_use_code                   int32
gis_seg_length                   float64
l_adrs_from                        int32
l_adrs_to                          int32
r_adrs_from                        int32
r_adrs_to                          int32
ord_pre_dir                       object
ord_street_name                   object
ord_street_type                   object
ord_suf_dir                       object
ord_stname_conca

# change datatypes

In [11]:
for cn in gdf.columns:
    if gdf[cn].dtype == np.int32:
        print
        gdf[cn] = gdf[cn].astype(np.int64)    

# indicate the sector of the city the street is in

In [12]:
# combine the ord_pre_dir and the ord_suf_dir to get the sector of the city.

In [13]:
col_names = ['ord_pre_dir', 'ord_suf_dir']

In [14]:
for cn in col_names:
    gdf[cn] = gdf[cn].fillna('')

In [15]:
gdf['city_sector'] = gdf.apply(func = create_city_sector, axis = 1)

In [16]:
gdf['city_sector'].unique()

array(['S', 'E', 'NE', 'NW', 'N', 'CNTR', 'SW', 'W'], dtype=object)

## note: I used qGIS to manually recode the following streets:
MAGNOLIA BR WB: W  
WEST SEATTLE BR WB: mix of SW and S  
WEST SEATTLE BR EB: mix of SW and S  
1ST AV S BR SB: mix of SW and S  
1ST AV S BR NB: mix of SW and S  
(it's a lot easier to do this in qGIS)

In [17]:
check = gdf.loc[gdf['city_sector'].str[-1] == 'B', 'ord_stname_concat'].unique().tolist()

In [18]:
# let's create a unique name field

In [19]:
gdf['ord_stname_unique'] = gdf.apply(func = create_name, axis = 1)

In [20]:
gdf['ord_stname_concat'].unique().shape

(3692,)

In [21]:
gdf['ord_stname_unique'].unique().shape

(3696,)

In [22]:
# let's check what type of geometry we are working with
gdf['geometry'].geom_type.value_counts()

MultiLineString    34378
Name: count, dtype: int64

In [23]:
# everything is a MultLineString??? is that necessary?
# we can call the gdf.explode() function to convert MultiLineStrings to LineStrings
# if the record counts are the same, we can convert everything to LineStrings 
test_gdf = gdf.explode()

In [24]:
test_gdf.shape

(34378, 39)

In [25]:
# nope! The number of records is the same, indicating that MultiLineStrings are not necessary
gdf = test_gdf.copy()

In [26]:
gdf['geometry'].geom_type.value_counts()

LineString    34378
Name: count, dtype: int64

# write the geodataframe to disk as a gpkg

In [27]:
# reproject to wgs 84
gdf = gdf.to_crs(crs = 'epsg:4326')

In [28]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
           output_file_name=rc.S01_SND_OUT_FILE_NAME)

# export the blank street types to an excel file for manual inspection

In [29]:
gdf['ord_street_type'].unique()

array(['AVE', 'CT', 'ST', 'PL', 'LN', 'RD', 'DR', 'WAY', 'PKWY', 'BLVD',
       'CIR', 'TER', None, 'AL', 'RP', 'LOOP', 'OP', 'TRL', 'BR', 'ET',
       'VIEW', 'HWY', 'PZ', 'MALL'], dtype=object)

In [30]:
blank_street_type_df = gdf.loc[gdf['ord_street_type'].isna(), 'ord_stname_concat'].drop_duplicates().to_frame()

In [31]:
blank_street_type_df.shape

(80, 1)

In [32]:
ofpn = os.path.join(rc.ANALYSIS_OUTPUT_FILE_PATH, rc.S01_BST_OUT_FILE_NAME)

In [33]:
blank_street_type_df.to_excel(excel_writer=ofpn, index = False)