# Part 06: Prep data for export for analysis for v2
michael babb  
2024 11 24

In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import matplotlib as mpl
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# custom
import run_constants as rc
from utils import *

# load the street network data

In [4]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S03_SND_WORKING_IN_FILE_NAME)

In [5]:
gdf = gpd.read_file(filename = fpn)

In [6]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry'],
      dtype='object')

In [7]:
gdf['n_roads'] = int(1)

# group data by street type for use in version 2.0

In [8]:
col_names = ['ord_street_name', 'ord_street_type', 'ord_stname_concat', 'city_portion']
st_name_type_df = gdf[col_names].drop_duplicates()

In [9]:
st_name_type_df.shape

(2455, 4)

In [10]:
st_name_type_df = st_name_type_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'ord_stname_concat'])

In [11]:
st_name_type_df.head(n=10)

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,city_portion
16649,100TH,PL,NW 100TH PL,NW
25064,100TH,PL,SW 100TH PL,SW
3392,100TH,ST,N 100TH ST,N
1185,100TH,ST,NE 100TH ST,NE
1685,100TH,ST,NW 100TH ST,NW
1741,100TH,ST,S 100TH ST,S
967,100TH,ST,SW 100TH ST,SW
25063,101ST,PL,SW 101ST PL,SW
2324,101ST,ST,N 101ST ST,N
1785,101ST,ST,NW 101ST ST,NW


In [12]:
st_name_type_df.shape

(2455, 4)

In [13]:
# make some ids - ord_street_name
osn_df = st_name_type_df['ord_street_name'].drop_duplicates().to_frame().sort_values(by = 'ord_street_name').reset_index(drop = True)
osn_df['osn_id'] = range(0, osn_df.shape[0])
osn_df.head()

Unnamed: 0,ord_street_name,osn_id
0,100TH,0
1,101ST,1
2,102ND,2
3,103RD,3
4,104TH,4


In [14]:
# ord_street_name and ord_street_type
col_names = ['ord_street_name', 'ord_street_type']
osn_ost_df = st_name_type_df[col_names].drop_duplicates().sort_values(by = col_names).reset_index(drop = True)
osn_ost_df['osn_ost'] = osn_ost_df['ord_street_name'] + ' ' + osn_ost_df['ord_street_type']
osn_ost_df['osn_ost_id'] = range(0, osn_ost_df.shape[0])
osn_ost_df.head()


Unnamed: 0,ord_street_name,ord_street_type,osn_ost,osn_ost_id
0,100TH,PL,100TH PL,0
1,100TH,ST,100TH ST,1
2,101ST,PL,101ST PL,2
3,101ST,ST,101ST ST,3
4,102ND,LN,102ND LN,4


In [15]:
# ord_stname_concat
ostc_df = st_name_type_df['ord_stname_concat'].drop_duplicates().to_frame().sort_values(by = 'ord_stname_concat').reset_index(drop = True)
ostc_df['ostc_id'] = range(0, ostc_df.shape[0])
ostc_df.head()


Unnamed: 0,ord_stname_concat,ostc_id
0,10TH AVE,0
1,10TH AVE E,1
2,10TH AVE NE,2
3,10TH AVE NW,3
4,10TH AVE S,4


In [16]:
# joins
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = osn_ost_df)
st_name_type_df = pd.merge(left = st_name_type_df, right = ostc_df)



In [17]:
st_name_type_df.shape

(2455, 8)

In [18]:
st_name_type_df.head()

Unnamed: 0,ord_street_name,ord_street_type,ord_stname_concat,city_portion,osn_id,osn_ost,osn_ost_id,ostc_id
0,100TH,PL,NW 100TH PL,NW,0,100TH PL,0,1450
1,100TH,PL,SW 100TH PL,SW,0,100TH PL,0,2041
2,100TH,ST,N 100TH ST,N,0,100TH ST,1,1127
3,100TH,ST,NE 100TH ST,NE,0,100TH ST,1,1253
4,100TH,ST,NW 100TH ST,NW,0,100TH ST,1,1451


In [19]:
st_name_type_df['ord_street_type'].unique()

array(['PL', 'ST', 'LN', 'WAY', 'AVE', 'CT', 'RD', 'BR', 'TER', 'CIR',
       'DR', 'PKWY', 'BLVD', 'PZ', 'VIEW', 'LOOP'], dtype=object)

In [20]:
st_name_type_df['city_portion'].unique()

array(['NW', 'SW', 'N', 'NE', 'S', 'CNTR', 'E', 'W', 'WB'], dtype=object)

In [21]:
# let's connect the ord_street_name and the ord_street_type
st_name_type_df['osn_ost'].unique().shape

(1631,)

In [22]:
# LET'S COUNT HOW MANY STREETS WE NEED TO PLOT AND MANUALLY REVIEW

In [23]:
st_name_type_df['n_roads'] = int(1)

In [24]:
st_name_type_df.columns

Index(['ord_street_name', 'ord_street_type', 'ord_stname_concat',
       'city_portion', 'osn_id', 'osn_ost', 'osn_ost_id', 'ostc_id',
       'n_roads'],
      dtype='object')

In [25]:
pv_df = pd.pivot_table(data = st_name_type_df, values = ['n_roads'],
                          index = ['ord_street_name', 'ord_street_type'],
                          columns = ['city_portion'], aggfunc='sum', fill_value=0,
                          margins=True)

In [26]:
pv_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads,n_roads
Unnamed: 0_level_1,city_portion,CNTR,E,N,NE,NW,S,SW,W,WB,All
ord_street_name,ord_street_type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
100TH,PL,0,0,0,0,1,0,1,0,0,2
100TH,ST,0,0,1,1,1,1,1,0,0,5
101ST,PL,0,0,0,0,0,0,1,0,0,1
101ST,ST,0,0,1,0,1,1,1,0,0,4
102ND,LN,0,0,0,0,0,0,1,0,0,1


In [27]:
pv_df.columns = [cn[1] for cn in pv_df.columns]

In [28]:
pv_df.columns

Index(['CNTR', 'E', 'N', 'NE', 'NW', 'S', 'SW', 'W', 'WB', 'All'], dtype='object')

In [29]:
pv_df = pv_df.reset_index()

In [30]:
pv_df.tail()

Unnamed: 0,ord_street_name,ord_street_type,CNTR,E,N,NE,NW,S,SW,W,WB,All
1627,YESLER,WAY,1,1,0,0,0,0,0,0,0,2
1628,YESLER TERRACE ACC,RD,1,0,0,0,0,0,0,0,0,1
1629,YORK,RD,0,0,0,0,0,1,0,0,0,1
1630,YUKON,AVE,0,0,0,0,0,1,0,0,0,1
1631,All,,233,185,232,377,242,606,377,202,1,2455


In [31]:
n_check = pv_df.loc[(pv_df['All'] >= 2), :].copy()

In [32]:
n_check = n_check.drop(labels = ['WB'], axis = 1)

In [33]:
n_check = n_check.loc[n_check['ord_street_name'] != 'All', :]

In [34]:
n_check.head()

Unnamed: 0,ord_street_name,ord_street_type,CNTR,E,N,NE,NW,S,SW,W,All
0,100TH,PL,0,0,0,0,1,0,1,0,2
1,100TH,ST,0,0,1,1,1,1,1,0,5
3,101ST,ST,0,0,1,0,1,1,1,0,4
5,102ND,ST,0,0,1,1,0,1,1,0,4
6,103RD,PL,0,0,0,1,0,0,1,0,2


In [42]:
melt_df = pd.melt(frame = n_check, id_vars = ['ord_street_name', 'ord_street_type'],var_name='city_portion', value_name = 'n_roads')

In [43]:
melt_df = melt_df.loc[(melt_df['n_roads'] > 0), :].copy()

In [44]:
melt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_portion,n_roads
14,10TH,AVE,CNTR,1
30,11TH,AVE,CNTR,1
42,12TH,AVE,CNTR,1
55,13TH,AVE,CNTR,1
61,14TH,AVE,CNTR,1


In [45]:
melt_df = melt_df.loc[(melt_df['city_portion'] != 'All'), :].copy()

In [46]:
melt_df = melt_df.sort_values(by = ['ord_street_name', 'ord_street_type', 'city_portion'])

In [47]:
melt_df = melt_df.drop(labels = ['n_roads'], axis = 1)

In [48]:
melt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_portion
1636,100TH,PL,NW
2454,100TH,PL,SW
819,100TH,ST,N
1228,100TH,ST,NE
1637,100TH,ST,NW


In [82]:
'1' in '123'

True

In [95]:
import re

pattern = r'\d+'
test_string = "tuesday"

In [96]:
outcome = re.findall(pattern=pattern, string=test_string)

In [99]:
outcome

[]

In [101]:
def get_sort_order(sn):
    re_outcome = re.findall(pattern=r'\d+', string = sn)
    if re_outcome:
        outcome = re_outcome[0]
        outcome = outcome.zfill(3)
    else:
        outcome = sn
    
    return outcome

In [102]:
# get the numeric value of the streets, for the sort order...
melt_df['sort_order'] = melt_df['ord_street_name'].map(get_sort_order)

In [103]:
melt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_portion,group_id,sort_order
0,100TH,PL,NW,,100
1,100TH,PL,SW,,100
2,100TH,ST,N,,100
3,100TH,ST,NE,,100
4,100TH,ST,NW,,100


In [107]:
col_names = ['sort_order', 'ord_street_name', 'ord_street_type', 'city_portion', 'group_id']
melt_df = melt_df[col_names].sort_values(by = col_names[:4])

In [108]:
melt_df = melt_df.set_index(keys = col_names[:4])

In [109]:
melt_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,group_id
sort_order,ord_street_name,ord_street_type,city_portion,Unnamed: 4_level_1
1,1ST,AVE,CNTR,
1,1ST,AVE,N,
1,1ST,AVE,NE,
1,1ST,AVE,NW,
1,1ST,AVE,S,


In [110]:
melt_df.shape

(1216, 1)

In [111]:
melt_df.to_excel(excel_writer='street_groups.xlsx')

In [53]:
# make plots

In [54]:
# color list
cdm = {}
dir_list = ['CNTR', 'E', 'N', 'NE', 'NW', 'S', 'SW', 'W']
for idir, dir_value in enumerate(dir_list):
    cdm[dir_value] = mpl.colormaps["Dark2"].colors[idir]

my_cmap = mpl.colors.ListedColormap([cdm[c] for c in dir_list])

In [55]:
# plot bounds to standardize
bounds = [-122.4197794277490061,47.4803548409661005, -122.2200188105690017,47.7341482423694004]

In [56]:
# load the boundaries of the city sections
ifpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_CITY_SECTIONS_OUT_FILE_NAME)
cs_gdf = gpd.read_file(filename=ifpn)

In [57]:
output_path = 'H:/project/seattle_streets/print/individual_streets'

In [59]:
melt_df = melt_df.reset_index()

In [60]:
melt_df.head()

Unnamed: 0,ord_street_name,ord_street_type,city_portion,group_id
0,100TH,PL,NW,
1,100TH,PL,SW,
2,100TH,ST,N,
3,100TH,ST,NE,
4,100TH,ST,NW,


In [62]:
osn_ost_df.shape

(1631, 4)

In [64]:
bk_df = melt_df[['ord_street_name', 'ord_street_type']].drop_duplicates()

In [65]:
bk_df.shape

(409, 2)

In [78]:
t_bk_df = bk_df.iloc[1:10]

In [79]:
t_bk_df.head()

Unnamed: 0,ord_street_name,ord_street_type
2,100TH,ST
7,101ST,ST
11,102ND,ST
15,103RD,PL
17,103RD,ST


In [81]:
for irow, row in bk_df.iterrows():
    osn = row['ord_street_name']
    ost = row['ord_street_type']


    output_file_name = f"{osn}_{ost}.png"
    ofpn = os.path.join(output_path, output_file_name)
    print(ofpn)

    # now, let's make a map...
    wgdf = gdf.loc[(gdf['ord_street_name'] == osn) &
                (gdf['ord_street_type'] == ost), :]
    to_draw = wgdf[['city_portion', 'geometry']].dissolve(by = 'city_portion', as_index = False)
    to_draw['coords'] = to_draw['geometry'].map(lambda x: x.centroid.coords[0])
    fig = plt.figure(layout = 'constrained', figsize = (5, 10))
    gs = GridSpec(1,1, figure = fig, height_ratios = [1])
    ax1 = fig.add_subplot(gs[0,0])
    ax1.set_xlim(bounds[0], bounds[2])
    ax1.set_ylim(bounds[1], bounds[3])
    cs_gdf.plot(ax = ax1, column = 'city_portion', alpha = .2)
    to_draw.plot(ax = ax1, column = 'city_portion', cmap = my_cmap,  linewidth = 5, legend = True)

    for irrow, row in to_draw.iterrows():    
        ax1.annotate(text=row['city_portion'], xy=row['coords'], fontsize = 16 )

    ax1.set_axis_off()

    osn = wgdf['ord_street_name'].unique().tolist()[0]
    ost = wgdf['ord_street_type'].unique().tolist()[0]
    my_title = f"Street Name: {osn} {ost}"
    plt.title(label = my_title)

    #plt.show()

    fig.savefig(fname = ofpn)
    plt.close()

H:/project/seattle_streets/print/individual_streets\100TH_PL.png
H:/project/seattle_streets/print/individual_streets\100TH_ST.png
H:/project/seattle_streets/print/individual_streets\101ST_ST.png
H:/project/seattle_streets/print/individual_streets\102ND_ST.png
H:/project/seattle_streets/print/individual_streets\103RD_PL.png
H:/project/seattle_streets/print/individual_streets\103RD_ST.png
H:/project/seattle_streets/print/individual_streets\104TH_ST.png
H:/project/seattle_streets/print/individual_streets\105TH_PL.png
H:/project/seattle_streets/print/individual_streets\105TH_ST.png
H:/project/seattle_streets/print/individual_streets\106TH_ST.png
H:/project/seattle_streets/print/individual_streets\107TH_ST.png
H:/project/seattle_streets/print/individual_streets\108TH_PL.png
H:/project/seattle_streets/print/individual_streets\108TH_ST.png
H:/project/seattle_streets/print/individual_streets\109TH_ST.png
H:/project/seattle_streets/print/individual_streets\10TH_AVE.png
H:/project/seattle_street

In [53]:
wgdf = gdf.loc[gdf['ord_street_name'] == '51ST', :]

In [None]:
wgdf.plot()

In [None]:
# VERSION 2.0
# We only really have to do this for ST and AVE.
# I can 

In [None]:
# VERSION 1.0: This works, but it's not perfect. It needs some tweaking. And too many exceptions.

In [None]:
def select_rows_by_set_membership(df:pd.DataFrame, curr_set:set):

    out_df = df.loc[(df['city_portion'].isin(curr_set)) & 
                       (df['group_id'] == -1), :] 

In [85]:
def find_street_pattern(my_combo:list, df:pd.DataFrame, city_portion_set:set, group_id:int):
    pattern_found = 0
    for ii in range(len(my_combo), 0, -1):
        for curr_combo in list(combinations(my_combo, r = ii)):
            curr_set = set(curr_combo)            
            if pattern_found == 0 and curr_set.issubset(city_portion_set):
                #print('!!!', curr_set)
                test_df = 
                if not test_df.empty:
                
                    df.loc[(df['city_portion'].isin(curr_set)) & 
                        (df['group_id'] == -1), 'group_type'] = '_'.join(curr_set)
                    
                    df.loc[(df['city_portion'].isin(curr_set)) & 
                        (df['group_id'] == -1), 'group_id'] = group_id                
                
                    #print(group_id)
                    pattern_found = 1                
                    group_id += 1
                    
    return df, group_id

In [86]:
t_osn_ost_df = osn_ost_df.loc[osn_ost_df['osn_ost_id'] == 1, :]
output_list = []
group_id = 0
st_name_type_df['group_id'] = -1
st_name_type_df['group_type'] = ''
for osn_ost_id in osn_ost_df['osn_ost_id'].unique():
    t1_df = st_name_type_df.loc[st_name_type_df['osn_ost_id'] == osn_ost_id, :].copy()
    for ost in t1_df['ord_street_type'].unique():
        # this is all streets of type OST, could be ord_stname_concat
        t2_df = t1_df.loc[t1_df['ord_street_type'] == ost, :].copy()
        city_portion_set = set(t2_df['city_portion'].unique().tolist())        
        if ost == 'ST':
            #print(ost, 'here')
            # streets run east / west: join east-west
            combo_lists = [
            ['NW', 'N', 'NE'],
            ['W', 'N', 'CNTR', 'E'],
            ['SW', 'S']
            ]
            for my_combo in combo_lists:
                #print('***', my_combo)
                t2_df, group_id = find_street_pattern(my_combo=my_combo,
                                                        df = t2_df,
                                                        city_portion_set=city_portion_set,
                                                        group_id=group_id)
            output_list.append(t2_df)
        elif ost == 'AVE':          
            #print(ost, 'here')
            # avenues run north / south: join north-south
            combo_lists = [
            ['NW', 'W', 'SW'],
            ['N', 'CNTR', 'SW'],
            ['N', 'CNTR', 'S'],
            ['NE', 'E', 'S']
            ]
            for my_combo in combo_lists:
                t2_df, group_id = find_street_pattern(my_combo=my_combo,
                                                        df = t2_df,
                                                        city_portion_set=city_portion_set,
                                                        group_id=group_id)
            output_list.append(t2_df) 
        else:                
            for ostc in t2_df['ord_stname_concat'].unique():
                t3_df = t2_df.loc[t2_df['ord_stname_concat'] == ostc, :].copy()
            
                t3_df['group_id'] = group_id
                t3_df['group_type'] = t3_df['city_portion']
                group_id += 1
                output_list.append(t3_df)                 
                #print(ost, 'here 3')                
                        
                            
                                

In [None]:
len(output_list)

In [88]:
testo = pd.concat(output_list)

In [None]:
testo.head()

In [None]:
testo.shape

In [None]:
testo['group_type'].unique().shape

In [None]:
gdf.shape

In [93]:
# join to the gdf
out_gdf = pd.merge(left = gdf, right = testo)

In [None]:
out_gdf.shape

In [95]:
write_gdf(gdf = out_gdf, output_file_path=rc.OUTPUT_FILE_PATH, output_file_name='pv_dfregate.gpkg')

In [None]:
testo['ord_street_type'].unique()

In [None]:
out_gdf.columns

In [None]:
group_type = 'CNTR_S_N'
t_gdf = out_gdf.loc[out_gdf['group_type'] == group_type, :].copy()
t_gdf['city_portion'].unique()

# start writing down rules
## streets run east / west: join across
* NW, N, and NE
* W, N, CNTR, E
* SW, S
## avenues run north / south: join across
* NW, W, SW, 
* N, CNTR, SW
* N, CNTR, S
* NE, E, S

## Street types that do not connect across sections/quadrants
* PL

In [24]:
# left off here: 2025 01 09
# bring in the street directions and types  