# Part 06: Prep data for export for analysis for v2
michael babb  
2024 11 24

In [1]:
# standard
import os
import re

In [2]:
# external
import geopandas as gpd
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# custom
import run_constants as rc
from utils import *

In [4]:
# process flow variables


# load the street network data

In [5]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S03_SND_WORKING_IN_FILE_NAME)

In [6]:
gdf = gpd.read_file(filename = fpn)

In [7]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry'],
      dtype='object')

In [8]:
gdf['n_roads'] = int(1)

# group data by street type for use in version 2.0

In [9]:
col_names = ['ord_street_name', 'ord_street_type', 'ord_stname_concat', 'city_portion']
st_name_type_df = gdf[col_names].drop_duplicates()

In [10]:
# the number of unique streets
st_name_type_df.shape

(2455, 4)

In [11]:
st_name_type_df = st_name_type_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'ord_stname_concat'])

In [12]:
st_name_type_df.head(n=10)

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,city_portion
16649,100TH,PL,NW 100TH PL,NW
25064,100TH,PL,SW 100TH PL,SW
3392,100TH,ST,N 100TH ST,N
1185,100TH,ST,NE 100TH ST,NE
1685,100TH,ST,NW 100TH ST,NW
1741,100TH,ST,S 100TH ST,S
967,100TH,ST,SW 100TH ST,SW
25063,101ST,PL,SW 101ST PL,SW
2324,101ST,ST,N 101ST ST,N
1785,101ST,ST,NW 101ST ST,NW


In [13]:
# make some ids - ord_street_name
osn_df = st_name_type_df['ord_street_name'].drop_duplicates().to_frame().sort_values(by = 'ord_street_name').reset_index(drop = True)
osn_df['osn_id'] = range(0, osn_df.shape[0])
osn_df.head()

Unnamed: 0,ord_street_name,osn_id
0,100TH,0
1,101ST,1
2,102ND,2
3,103RD,3
4,104TH,4


In [14]:
# ord_street_name and ord_street_type
col_names = ['ord_street_name', 'ord_street_type']
osn_ost_df = st_name_type_df[col_names].drop_duplicates().sort_values(by = col_names).reset_index(drop = True)
osn_ost_df['osn_ost'] = osn_ost_df['ord_street_name'] + ' ' + osn_ost_df['ord_street_type']
osn_ost_df['osn_ost_id'] = range(0, osn_ost_df.shape[0])
osn_ost_df.head()


Unnamed: 0,ord_street_name,ord_street_type,osn_ost,osn_ost_id
0,100TH,PL,100TH PL,0
1,100TH,ST,100TH ST,1
2,101ST,PL,101ST PL,2
3,101ST,ST,101ST ST,3
4,102ND,LN,102ND LN,4


In [15]:
# ord_stname_concat
ostc_df = st_name_type_df['ord_stname_concat'].drop_duplicates().to_frame().sort_values(by = 'ord_stname_concat').reset_index(drop = True)
ostc_df['ostc_id'] = range(0, ostc_df.shape[0])
ostc_df.head()


Unnamed: 0,ord_stname_concat,ostc_id
0,10TH AVE,0
1,10TH AVE E,1
2,10TH AVE NE,2
3,10TH AVE NW,3
4,10TH AVE S,4


In [16]:
# joins
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_ost_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = ostc_df)



In [17]:
st_name_type_df.shape

(2455, 8)

In [18]:
st_name_type_df.head()

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,city_portion,osn_id,osn_ost,osn_ost_id,ostc_id
0,100TH,PL,NW 100TH PL,NW,0,100TH PL,0,1450
1,100TH,PL,SW 100TH PL,SW,0,100TH PL,0,2041
2,100TH,ST,N 100TH ST,N,0,100TH ST,1,1127
3,100TH,ST,NE 100TH ST,NE,0,100TH ST,1,1253
4,100TH,ST,NW 100TH ST,NW,0,100TH ST,1,1451


In [19]:
st_name_type_df['ord_street_type'].unique()

array(['PL', 'ST', 'LN', 'WAY', 'AVE', 'CT', 'RD', 'BR', 'TER', 'CIR',
       'DR', 'PKWY', 'BLVD', 'PZ', 'VIEW', 'LOOP'], dtype=object)

In [20]:
st_name_type_df['city_portion'].unique()

array(['NW', 'SW', 'N', 'NE', 'S', 'CNTR', 'E', 'W', 'WB'], dtype=object)

In [21]:
check = st_name_type_df.loc[st_name_type_df['city_portion'] == 'WB', ]

In [23]:
check

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,city_portion,osn_id,osn_ost,osn_ost_id,ostc_id
1799,MAGNOLIA,BR,MAGNOLIA BR WB,WB,727,MAGNOLIA BR,1066,1063


In [24]:
# let's connect the ord_street_name and the ord_street_type
st_name_type_df['osn_ost'].unique().shape

(1631,)

In [25]:
# LET'S COUNT HOW MANY STREETS WE NEED TO PLOT AND MANUALLY REVIEW

In [26]:
st_name_type_df['n_roads'] = int(1)

In [None]:
st_name_type_df.columns

In [28]:
pv_df = pd.pivot_table(data = st_name_type_df, values = ['n_roads'],
                          index = ['ord_street_name', 'ord_street_type'],
                          columns = ['city_portion'], aggfunc='sum', fill_value=0,
                          margins=True)

In [None]:
pv_df.head()

In [30]:
pv_df.columns = [cn[1] for cn in pv_df.columns]

In [None]:
pv_df.columns

In [32]:
pv_df = pv_df.reset_index()

In [None]:
pv_df.tail()

In [34]:
n_check = pv_df.loc[(pv_df['All'] >= 2), :].copy()

In [35]:
n_check = n_check.drop(labels = ['WB'], axis = 1)

In [36]:
n_check = n_check.loc[n_check['ord_street_name'] != 'All', :]

In [None]:
n_check.head()

In [38]:
melt_df = pd.melt(frame = n_check, id_vars = ['ord_street_name', 'ord_street_type'],var_name='city_portion', value_name = 'n_roads')

In [39]:
melt_df = melt_df.loc[(melt_df['n_roads'] > 0), :].copy()

In [None]:
melt_df.head()

In [41]:
melt_df = melt_df.loc[(melt_df['city_portion'] != 'All'), :].copy()

In [42]:
melt_df = melt_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'city_portion'])

In [43]:
melt_df = melt_df.drop(labels = ['n_roads'], axis = 1)

In [None]:
melt_df.head()

In [45]:
def get_sort_order(sn):
    re_outcome = re.findall(pattern=r'\d+', string = sn)
    if re_outcome:
        outcome = re_outcome[0]
        outcome = outcome.zfill(3)
    else:
        outcome = sn
    
    return outcome

In [46]:
# get the numeric value of the streets, for the sort order...
melt_df['sort_order'] = melt_df['ord_street_name'].map(get_sort_order)

In [None]:
melt_df.head()

In [48]:
col_names = ['sort_order', 'ord_street_name', 'ord_street_type', 'city_portion']
melt_df = melt_df[col_names].sort_values(by = col_names[:4])

In [49]:
melt_df = melt_df.set_index(keys = col_names[:4])

In [None]:
melt_df.head()

In [None]:
melt_df.shape

In [52]:
melt_df.to_excel(excel_writer='street_groups.xlsx')

In [53]:
# make plots

In [54]:
# color list
cdm = {}
dir_list = ['CNTR', 'E', 'N', 'NE', 'NW', 'S', 'SW', 'W']
for idir, dir_value in enumerate(dir_list):
    cdm[dir_value] = mpl.colormaps["Dark2"].colors[idir]

my_cmap = mpl.colors.ListedColormap([cdm[c] for c in dir_list])

In [55]:
# plot bounds to standardize
bounds = [-122.4197794277490061,47.4803548409661005, -122.2200188105690017,47.7341482423694004]

In [56]:
# load the boundaries of the city sections
ifpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_CITY_SECTIONS_OUT_FILE_NAME)
cs_gdf = gpd.read_file(filename=ifpn)

In [57]:
output_path = 'H:/project/seattle_streets/print/individual_streets'

In [58]:
melt_df = melt_df.reset_index()

In [None]:
melt_df.head()

In [None]:
osn_ost_df.shape

In [61]:
bk_df = melt_df[['ord_street_name', 'ord_street_type']].drop_duplicates()

In [None]:
bk_df.shape

In [63]:
t_bk_df = bk_df.iloc[1:10]

In [None]:
t_bk_df.head()

In [65]:
make_plots = False
if make_plots:
    for irow, row in bk_df.iterrows():
        osn = row['ord_street_name']
        ost = row['ord_street_type']


        output_file_name = f"{osn}_{ost}.png"
        ofpn = os.path.join(output_path, output_file_name)
        print(ofpn)

        # now, let's make a map...
        wgdf = gdf.loc[(gdf['ord_street_name'] == osn) &
                    (gdf['ord_street_type'] == ost), :]
        to_draw = wgdf[['city_portion', 'geometry']].dissolve(by = 'city_portion', as_index = False)
        to_draw['coords'] = to_draw['geometry'].map(lambda x: x.centroid.coords[0])
        fig = plt.figure(layout = 'constrained', figsize = (5, 10))
        gs = GridSpec(1,1, figure = fig, height_ratios = [1])
        ax1 = fig.add_subplot(gs[0,0])
        ax1.set_xlim(bounds[0], bounds[2])
        ax1.set_ylim(bounds[1], bounds[3])
        cs_gdf.plot(ax = ax1, column = 'city_portion', alpha = .2)
        to_draw.plot(ax = ax1, column = 'city_portion', cmap = my_cmap,  linewidth = 5, legend = True)

        for irrow, row in to_draw.iterrows():    
            ax1.annotate(text=row['city_portion'], xy=row['coords'], fontsize = 16 )

        ax1.set_axis_off()

        osn = wgdf['ord_street_name'].unique().tolist()[0]
        ost = wgdf['ord_street_type'].unique().tolist()[0]
        my_title = f"Street Name: {osn} {ost}"
        plt.title(label = my_title)

        #plt.show()

        fig.savefig(fname = ofpn)
        plt.close()

In [66]:
# load the saved excel file with the manually created groups

In [67]:
file_name = 'street_groups_working.xlsx'

In [68]:
index_cols = ['sort_order', 'ord_street_name', 'ord_street_type', 'city_portion']

In [69]:
sg_df = pd.read_excel(io = file_name, index_col=[0, 1, 2, 3])

In [None]:
sg_df.head()

In [71]:
sg_df = sg_df.reset_index()

In [None]:
sg_df.head()

In [None]:
gdf.shape

In [74]:
gdf = pd.merge(left = gdf, right = sg_df, how = 'left')

In [75]:
drop_cols = ['sort_order', 'progress', 'count']
gdf = gdf.drop(labels = drop_cols, axis = 1)

In [76]:
gdf['group_id'] = gdf['group_id'].fillna(1)

In [None]:
rc.S03_SND_WORKING_IN_FILE_NAME

In [None]:
gdf.shape

In [79]:
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH, output_file_name='Street_Network_Database_Seattle_working_v2.gpkg')

In [80]:
# is every state name represented as a street?

In [83]:
file_path = 'H:/data/census_geography/states/state_state_code.xlsx'

In [84]:
state_df = pd.read_excel(io = file_path)

In [93]:
state_df = state_df.loc[state_df['State'] == 1, :]

In [97]:
s_set =  set([sn.strip().lower() for sn in state_df['Name ']])

In [None]:
s_set

In [99]:
street_name = set(gdf['ord_street_name'].str.lower())

In [100]:
testo = s_set.intersection(street_name)

In [None]:
len(testo)

In [None]:
'york' in street_name

In [None]:
testo