# Part 04: Tabulate statistics on the street disconntinuties
michael babb  
2024 11 24

In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# custom
import run_constants as rc
from utils import *

# output

In [4]:
ofpn = os.path.join(rc.ANALYSIS_OUTPUT_FILE_PATH, rc.S05_ANALYSIS_OUT_FILE_NAME)

In [5]:
e_writer = pd.ExcelWriter(path = ofpn)

# load the street network data

In [6]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S05_MISSING_IN_FILE_NAME)

In [7]:
gdf = gpd.read_file(filename = fpn)

In [8]:
gdf.columns

Index(['snd_id', 'ord_street_name', 'ord_street_type', 'ord_stname_type',
       'ord_stname_concat', 'ord_stname_unique', 'ord_stname_type_group',
       'ord_street_type_rank', 'snd_group', 'group_id', 'city_portion',
       'city_portion_group', 'street_status', 'sn_id', 'en_id', 'dist',
       'dist_miles', 'geometry'],
      dtype='object')

In [9]:
gdf['n_segments'] = int(1)

In [10]:
# add text descriptions for the street type categories
street_status_dict = {0:'continuous_street',
                1:'discontinuous_street',
                2:'added_street: intra-sector',
                3:'added_street: inter-sector'}

In [11]:
gdf['street_status_desc'] = gdf['street_status'].map(street_status_dict)

In [12]:
# number of segments
gdf['street_status_desc'].value_counts()

street_status_desc
discontinuous_street          21940
continuous_street              3818
added_street: intra-sector     3617
added_street: inter-sector      410
Name: count, dtype: int64

In [13]:
# simple count of road miles
col_names = ['street_status_desc', 'dist_miles']
gdf_agg = gdf[col_names].groupby(col_names[:-1]).agg(n_miles = ('dist_miles', 'sum')).reset_index()

In [14]:
# total road miles
gdf_agg.head()

Unnamed: 0,street_status_desc,n_miles
0,added_street: inter-sector,823.728529
1,added_street: intra-sector,833.487569
2,continuous_street,312.350375
3,discontinuous_street,1578.369226


In [15]:
col_names = ['ord_street_name', 'ord_stname_concat', 'street_status_desc', 'n_segments']
gdf_agg = gdf[col_names].drop_duplicates().groupby(col_names[2:-1]).agg(n_roads = ('n_segments', 'sum')).reset_index()

In [16]:
gdf_agg.head()

Unnamed: 0,street_status_desc,n_roads
0,added_street: inter-sector,241
1,added_street: intra-sector,1103
2,continuous_street,1147
3,discontinuous_street,1308


In [17]:
# number of added segment
gdf['street_status_desc'].value_counts()

street_status_desc
discontinuous_street          21940
continuous_street              3818
added_street: intra-sector     3617
added_street: inter-sector      410
Name: count, dtype: int64

In [18]:
# average number of segments added per uniquely named road
3617 / 1103

3.2792384406165005

# export data for use in a web map

In [19]:
# dissolve - this also aggregates
col_names = ['ord_stname_type_group','snd_group', 'street_status', 'group_id', 'dist_miles', 'geometry']
diss_gdf = gdf[col_names].dissolve(by = col_names[:-2],
                     aggfunc =  ['sum', 'size'], as_index = False)

In [20]:
diss_gdf.head()

Unnamed: 0,ord_stname_type_group,snd_group,street_status,group_id,geometry,"(dist_miles, sum)","(dist_miles, size)"
0,100TH PL NW,0,0,1,"MULTILINESTRING ((-122.36206 47.7035, -122.363...",0.201049,3
1,100TH PL SW,1,0,2,"LINESTRING (-122.3403 47.51334, -122.34036 47....",0.035254,1
2,100TH ST NW_N_NE,0,2,1,"LINESTRING (-122.38387 47.70155, -122.3823 47....",0.072951,1
3,100TH ST NW_N_NE,1,2,1,"LINESTRING (-122.36478 47.70151, -122.36208 47...",0.125399,1
4,100TH ST NW_N_NE,2,1,1,"MULTILINESTRING ((-122.38757 47.70155, -122.38...",0.273002,3


In [21]:
diss_gdf.shape

(9870, 7)

In [22]:
# set column names
col_names = ['ord_stname_type_group','snd_group', 'street_status', 'group_id', 'geometry', 'dist_miles', 'n_segments']
diss_gdf.columns = col_names

In [23]:
diss_gdf.head()

Unnamed: 0,ord_stname_type_group,snd_group,street_status,group_id,geometry,dist_miles,n_segments
0,100TH PL NW,0,0,1,"MULTILINESTRING ((-122.36206 47.7035, -122.363...",0.201049,3
1,100TH PL SW,1,0,2,"LINESTRING (-122.3403 47.51334, -122.34036 47....",0.035254,1
2,100TH ST NW_N_NE,0,2,1,"LINESTRING (-122.38387 47.70155, -122.3823 47....",0.072951,1
3,100TH ST NW_N_NE,1,2,1,"LINESTRING (-122.36478 47.70151, -122.36208 47...",0.125399,1
4,100TH ST NW_N_NE,2,1,1,"MULTILINESTRING ((-122.38757 47.70155, -122.38...",0.273002,3


In [24]:
diss_gdf['n_records'] = 1

In [25]:
diss_gdf['geometry'].geom_type.value_counts()

LineString         6145
MultiLineString    3725
Name: count, dtype: int64

In [26]:
diss_gdf['geometry'] = diss_gdf['geometry'].map(check_MultiLineStrings)

In [27]:
diss_gdf['geometry'].geom_type.value_counts()

LineString         9750
MultiLineString     120
Name: count, dtype: int64

In [28]:
diss_gdf.columns

Index(['ord_stname_type_group', 'snd_group', 'street_status', 'group_id',
       'geometry', 'dist_miles', 'n_segments', 'n_records'],
      dtype='object')

In [29]:
col_names = ['osntg','sndg', 'ss', 'gi', 'geometry', 'dm', 'ns', 'nr']

In [30]:
diss_gdf.columns = col_names

In [31]:
# let's try dropping some columns
drop_col_names = ['sndg', 'gi', 'ns']

In [32]:
diss_gdf = diss_gdf.drop(labels=drop_col_names, axis = 1)

In [33]:
out_data = diss_gdf.to_json(drop_id=True, to_wgs84=True)
output_file_name = 'all_streets_diss_v2.geojson'
write_json(json_data=out_data, output_file_path ='../maps',
               output_file_name = output_file_name)


all_streets_diss_v2


In [34]:
# now... is there a way to make this even smaller? Remove more rows?

In [35]:
diss_gdf.columns

Index(['osntg', 'ss', 'geometry', 'dm', 'nr'], dtype='object')

In [36]:
col_names = ['osntg', 'ss', 'dm', 'nr']

In [37]:
testo = diss_gdf.dissolve(by = col_names[:2], aggfunc =  ['sum'], as_index = False)

In [38]:
testo.head()

Unnamed: 0,osntg,ss,geometry,"(dm, sum)","(nr, sum)"
0,100TH PL NW,0,"LINESTRING (-122.36206 47.7035, -122.36342 47....",0.201049,1
1,100TH PL SW,0,"LINESTRING (-122.3403 47.51334, -122.34036 47....",0.035254,1
2,100TH ST NW_N_NE,1,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",4.535595,9
3,100TH ST NW_N_NE,2,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",0.653439,7
4,100TH ST NW_N_NE,3,"LINESTRING (-122.33512 47.70142, -122.32852 47...",0.307315,1


In [39]:
testo.columns = ['osntg', 'ss', 'geometry', 'dm', 'nr']

In [40]:
diss_gdf['nr'].sum()

9870

In [41]:
testo.shape

(2820, 5)

In [42]:
testo.head()

Unnamed: 0,osntg,ss,geometry,dm,nr
0,100TH PL NW,0,"LINESTRING (-122.36206 47.7035, -122.36342 47....",0.201049,1
1,100TH PL SW,0,"LINESTRING (-122.3403 47.51334, -122.34036 47....",0.035254,1
2,100TH ST NW_N_NE,1,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",4.535595,9
3,100TH ST NW_N_NE,2,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",0.653439,7
4,100TH ST NW_N_NE,3,"LINESTRING (-122.33512 47.70142, -122.32852 47...",0.307315,1


In [43]:
def format_osntg(sn):
    if '_' in sn:
        pos = sn.rfind(' ')
        osn = sn[:pos]
        ost = sn[pos:].replace('_', ' | ')
        outcome = osn + ' | ' + ost
    else:
        outcome = sn
    return outcome

In [44]:
testo['osntg'] = testo['osntg'].map(format_osntg)

In [45]:
testo.head()

Unnamed: 0,osntg,ss,geometry,dm,nr
0,100TH PL NW,0,"LINESTRING (-122.36206 47.7035, -122.36342 47....",0.201049,1
1,100TH PL SW,0,"LINESTRING (-122.3403 47.51334, -122.34036 47....",0.035254,1
2,100TH ST | NW | N | NE,1,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",4.535595,9
3,100TH ST | NW | N | NE,2,"MULTILINESTRING ((-122.38387 47.70155, -122.38...",0.653439,7
4,100TH ST | NW | N | NE,3,"LINESTRING (-122.33512 47.70142, -122.32852 47...",0.307315,1


In [46]:
out_data = testo.to_json(drop_id=True, to_wgs84=True)
output_file_name = 'all_streets_diss_v2.geojson'
write_json(json_data=out_data, output_file_path ='../maps',
               output_file_name = output_file_name, var_name = 'all_streets_diss')


all_streets_diss


# what street name - excluding direction - has the most street types?

In [None]:
col_names = ['ord_street_name', 'ord_street_type', 'n_segments']

In [None]:
gdf_agg = gdf[col_names].drop_duplicates()

In [None]:
gdf_pv = pd.pivot_table(data = gdf_agg, values = ['n_segments'], index = 'ord_street_name', columns = ['ord_street_type'], aggfunc = 'sum', fill_value = 0, margins = True).reset_index()

In [None]:
gdf_pv.columns = [''.join(cn).replace('n_segments', '') for cn in gdf_pv.columns]

In [None]:
gdf_pv.head()

In [None]:
gdf_pv.to_excel(excel_writer=e_writer, sheet_name = 's_type_count', index = False)

# what street name - excluding type - has the most street connections?

In [None]:
col_names = ['ord_street_name', 'ord_stname_concat', 'street_status_desc', 'n_segments']
gdf_agg = gdf[col_names].drop_duplicates()

In [None]:
gdf_pv = pd.pivot_table(data = gdf_agg, values = ['n_segments'], index = 'ord_street_name', columns = ['street_status_desc'],
                        aggfunc = 'sum', fill_value = 0, margins = True).reset_index()

In [None]:
gdf_pv.head()

In [None]:
gdf_pv.columns = [''.join(cn).replace('n_segments', '') for cn in gdf_pv.columns]

In [None]:
gdf_pv.columns

In [None]:
gdf_pv.head()

In [None]:
gdf_pv.to_excel(excel_writer=e_writer, sheet_name = 'connection_count_by_name', index = False)

# what street name - including direction and type - has the most street connections?

In [None]:
col_names = ['ord_stname_concat', 'ord_street_type', 'street_status', 'street_status_desc', 'n_segments']
gdf_agg = gdf[col_names]

In [None]:
gdf_pv = pd.pivot_table(data = gdf_agg, values = ['n_segments'], index = ['ord_stname_concat', 'ord_street_type'],
                        columns = ['street_status_desc'], aggfunc = 'sum', fill_value = 0, margins = True).reset_index()

In [None]:
gdf_pv.columns = [''.join(cn).replace('n_segments', '') for cn in gdf_pv.columns]

In [None]:
gdf_pv.head()

In [None]:
# remove the all row
gdf_pv = gdf_pv.loc[gdf_pv['ord_stname_concat'] != 'All', :]

In [None]:
gdf_pv.shape

# what are the streets with at least 10 discontinuities?

In [None]:
gdf_pv.loc[gdf_pv['added_street'] >= 10, 'ord_stname_concat'].tolist()

In [None]:
gdf_pv.to_excel(excel_writer=e_writer, sheet_name = 'connection_count_full_name', index = False)

# what is the ratio of discontinuous to continuous streets?

In [None]:
# number of streets with at least one added segment
split_streets = gdf_pv.loc[gdf_pv['discontinuous_street'] > 0, :].shape[0]
split_streets

In [None]:
# number of streets with no added segments
complete_streets = gdf_pv.loc[gdf_pv['continuous_street'] > 0, :].shape[0]
complete_streets

In [None]:
split_streets / gdf_pv.shape[0]

In [None]:
complete_streets / gdf_pv.shape[0]

In [None]:
# 44 percent of streets are multi-segments.
# 55 percent of streets are single segment.

# sum of road miles by discontinuous street

In [None]:
col_names = ['ord_stname_concat', 'ord_street_type', 'street_status', 'street_status_desc', 'dist_miles']
gdf_agg = gdf[col_names]

In [None]:
gdf_pv = pd.pivot_table(data = gdf_agg, values = ['dist_miles'], index = ['ord_stname_concat', 'ord_street_type'],
                        columns = ['street_status_desc'], aggfunc = 'sum', fill_value = 0, margins = True).reset_index()

In [None]:
gdf_pv.columns = [''.join(cn).replace('dist_miles', '') for cn in gdf_pv.columns]

In [None]:
gdf_pv.shape

In [None]:
gdf_pv = gdf_pv.loc[gdf_pv['ord_stname_concat']!= 'All', :].copy()

In [None]:
gdf_pv.shape

In [None]:
gdf_pv.head()

# which streets feature longer discontinuous portions?

In [None]:
gdf_pv['longer_added'] = int(0) # street is complete
gdf_pv.loc[(gdf_pv['continuous_street'] == 0) & (gdf_pv['added_street'] <= gdf_pv['discontinuous_street']), 'longer_added'] = int(1)
gdf_pv.loc[(gdf_pv['continuous_street'] == 0) & (gdf_pv['added_street'] > gdf_pv['discontinuous_street']), 'longer_added'] = int(2)

In [None]:
gdf_pv['longer_added'].value_counts()

In [None]:
gdf_pv.to_excel(excel_writer=e_writer, sheet_name = 'distance_full_name', index = False)

In [None]:
# road miles on streets with at least one added segment
split_streets = gdf_pv['discontinuous_street'].sum()
split_streets

In [None]:
# road miles on streets with no added segments
complete_streets = gdf_pv['continuous_street'].sum()
complete_streets

In [None]:
split_streets / (split_streets + complete_streets)

In [None]:
complete_streets / (split_streets + complete_streets)

In [None]:
# 78 percent of road miles are multi-segments streets
# 22 percent of road miles are single segment streets

# tabulate distances and distributions by street type

In [None]:
st_type_df = pd.pivot_table(data = gdf, index = ['ord_street_type'],
                         columns = ['street_status_desc'], values = ['dist_miles'],
                        aggfunc='sum', fill_value = 0).reset_index()



In [None]:
st_type_df.columns = [''.join(cn).replace('dist_miles', '') for cn in st_type_df.columns]

In [None]:
st_type_df.shape

In [None]:
st_type_df.head()

In [None]:
# now, melt
st_type_df_melt = pd.melt(frame = st_type_df, id_vars = ['ord_street_type'],
                          var_name = 'street_status_desc',
                          value_name = 'total_miles')

In [None]:
st_type_df_melt.head()

In [None]:
col_names = ['ord_street_type', 'street_status_desc', 'dist_miles']

In [None]:
ds_st_df = gdf[col_names].groupby(col_names[:-1]).describe().reset_index()

In [None]:
ds_st_df.head()

In [None]:
ds_st_df.columns =  [''.join(cn).replace('dist_miles', '') for cn in ds_st_df.columns]

In [None]:
ds_st_df.head()

In [None]:
temp_ds_st_df = ds_st_df.loc[ds_st_df['street_status_desc'] == 'added_street', :].copy()

In [None]:
temp_ds_st_df = temp_ds_st_df.drop(labels = 'std', axis = 1)

In [None]:
temp_ds_st_df = temp_ds_st_df.sort_values(by = ['count'], ascending = False)

In [None]:
temp_ds_st_df.to_excel(excel_writer=e_writer, sheet_name = 'added_street_count', index = False)

In [None]:
st_type_df_melt.head()

In [None]:
ds_st_df.head()

In [None]:
# join to get total miles and summary statistics
ds_st_df = pd.merge(left = st_type_df_melt, right = ds_st_df, how = 'left')

In [None]:
ds_st_df.head()

In [None]:
ds_st_df.columns

In [None]:
for cn in ds_st_df.columns:
    if ds_st_df[cn].dtype == 'float64':
        ds_st_df[cn] = ds_st_df[cn].fillna(0)

In [None]:
ds_st_df = ds_st_df.sort_values(by = ['ord_street_type', 'street_status_desc'])

In [None]:
ds_st_df['street_status_desc'] = ds_st_df['street_status_desc'].str.replace('_street', '')

In [None]:
# set index

In [None]:
ds_st_df = ds_st_df.set_index(keys = ['ord_street_type', 'street_status_desc'])

In [None]:
# reorder columns
col_names = ['count', 'min', '25%', '50%', 'mean', '75%', 'max', 'total_miles']
ds_st_df = ds_st_df[col_names]

In [None]:
ds_st_df.to_excel(excel_writer=e_writer, sheet_name = 'sum_stats', index = True)
                  

In [None]:
e_writer.close()

In [None]:
ds_st_df.head()

# 10 longest added streets

In [None]:
wms_gdf = gdf.loc[gdf['street_status'] == 2, :].copy()

In [None]:
wms_gdf['dist_rank'] = wms_gdf['dist'].rank(method = 'dense', ascending = False)

In [None]:
wms_gdf.loc[wms_gdf['dist_rank'] <= 10, 'ord_stname_concat'].tolist()

# histogram of all added streets

In [None]:
# log transform the distance in miles to prep for plotting
wms_gdf['dist_miles_log'] = np.log10(wms_gdf['dist_miles'])

In [None]:
wms_gdf['dist_miles_log'].describe()

In [None]:
# compute summary stats and create a dataframe
# the ds_df will hold values of interest to plot and plotting keywords for 
# those values
ds_df = wms_gdf['dist_miles'].describe().to_frame().reset_index(names = ['stat'])

In [None]:
# remove the count and the standard deviation - not useful for plotting
ds_df = ds_df.loc[-ds_df['stat'].isin(['count', 'std']), :]

In [None]:
# add the 10th, 95th, and 99th precentiles
temp_stat_records = [
    ['10%', np.quantile(a = wms_gdf['dist_miles'], q = 0.10)],
    ['95%', np.quantile(a = wms_gdf['dist_miles'], q = 0.95)],
    ['99%', np.quantile(a = wms_gdf['dist_miles'], q = 0.99)]
]
temp_stat_df = pd.DataFrame(data = temp_stat_records, columns = ds_df.columns)
temp_stat_df.head()

In [None]:
# combine
ds_df = pd.concat(objs = [ds_df, temp_stat_df], axis = 0)

In [None]:
ds_df = ds_df.sort_values(by = ['dist_miles'])

In [None]:
ds_df['dist_miles_log'] = ds_df['dist_miles'].map(lambda x: np.log10(x))

In [None]:
# the color of the lines to add
ds_df['color_list'] = 'black'

In [None]:
# convert to feet to help with labeling
ds_df['dist_feet'] = ds_df['dist_miles'] * 5280

In [None]:
# this the horizontal alignment of each piece of added text
ds_df['text_ha'] = ['left', 'right',  'center', 'right', 'right', 'left', 'center', 'center', 'right']

In [None]:
# the text labels to add
ds_df['text'] = ['4ft (min.)', '99 ft (10%)', '172 ft (25%)', '443 ft (med.)', '0.23 mi (avg.)', '0.25 mi (75%)', '0.96 mi (95%)','1.8 mi (99%)', '5 mi (max.)']

In [None]:
# jitter each piece of text by a small amount left or right
ds_df['text_jitter'] = [0, .001, 0, .09, 0, 0, -.05, 0, 0]

In [None]:
# the horizontal position is created from the distance in miles and the text jitter
ds_df['text_x_pos'] = ds_df['dist_miles_log'] + ds_df['text_jitter']

In [None]:
ds_df.head(n=10)

In [None]:
wms_gdf.head()

In [None]:
# compute the histogram of the added distribution
sns.set_theme(style = "whitegrid")
f, ax = plt.subplots(figsize = (20, 5))

ax.set_ylim(0, 185.6)
ax.set_xlim(-3.2, .75)

my_plot = sns.histplot(data = wms_gdf, x = 'dist_miles_log',
                      color='#ca0020', bins = 100)

# set the top part of the plot frame - the spine - to black
ax.spines['top'].set_edgecolor(color = 'black')

y_ticks = list(range(0, 176, 25))
y_tick_labels_formatted = ['{:,}'.format(ytl) for ytl in y_ticks]   

my_plot.set_yticks(ticks = y_ticks)
my_plot.set_yticklabels(labels = y_tick_labels_formatted, rotation=0)

# total added streets
n_added = wms_gdf.shape[0]
n_added =  f"{n_added:,}"

tot_miles = wms_gdf['dist_miles'].sum()
tot_miles = f"{int(round(tot_miles, 0)):,}"

my_title = "Histogram of added road segment length, all road types (n = {} | {} miles)".format(n_added, tot_miles)

plt.title(label = my_title, fontsize = 16)
plt.xlabel(xlabel = 'Added road segment length (log-scale)')
plt.ylabel(ylabel = "Number of misssing road segments")

# x tick positions
x_tick_labels = [50 / 5280, 100 / 5280, 300 / 5280, .1, .25, .5, 1, 2, 3]
x_ticks = [np.log10(xtl) for xtl in x_tick_labels]

# x tick labels
x_tick_label_text = ['50 ft', '100 ft', '300 ft',  '1/10 mi', '1/4 mi', '1/2 mi', '1 mi', '2 mi', '3 mi']


my_plot.set_xticks(ticks = x_ticks)
my_plot.set_xticklabels(labels = x_tick_label_text, rotation=0)

# white out the top portion of the ticks
for xt in x_ticks:
    my_plot.vlines(x = xt, ymin = 175, ymax = 185, color = 'white', linestyles = 'solid', linewidth = 2)

# add vertical lines at some descriptive statistics points
my_plot.vlines(x = ds_df['dist_miles_log'], ymin = 0, ymax = 175, color = ds_df['color_list'], linestyles = 'dashdot')

# add text for the descriptive stats
for ir, row in ds_df.iterrows():
    my_plot.text(x = row['text_x_pos'], y = 176, s = row['text'], horizontalalignment = row['text_ha'], color = '#ca0020')

output_file_name = '..\\graphics\\histogram_ALL_streets.png'
output_file_name = os.path.normpath(output_file_name)
print(output_file_name)
my_plot.get_figure().savefig(fname = output_file_name)
plt.show()

In [None]:
# and there you have it... most added streets are short. 
# 75% of all added streets are 0.25 miles or less. 

# histogram of each type of street

In [None]:
st_type_df = pd.pivot_table(data = wms_gdf, index = ['ord_street_type'],
                         columns = ['street_status_desc'], values = ['dist_miles'],
                        aggfunc='sum', fill_value = 0).reset_index()

In [None]:
st_type_df.head()

In [None]:
st_type_df.columns = [''.join(cn).replace('dist_miles', '') for cn in st_type_df.columns]

In [None]:
st_type_df.head()

In [None]:
st_type_df.columns = ['ord_street_type', 'total_miles']

In [None]:
ds_df = wms_gdf[['ord_street_type', 'dist_miles']].groupby(['ord_street_type'])['dist_miles'].describe().reset_index(names = ['ord_street_type'])

In [None]:
ds_df.head()

In [None]:
ds_df['max'].tolist()

In [None]:
ds_df.shape

In [None]:
for ir, row in ds_df.iterrows():
    # let's make a graphic showing the distances
    sns.set_theme(style = "whitegrid")
    f, ax = plt.subplots(figsize = (20, 5))

    # set the top part of the plot frame - the spine - to black
    ax.spines['top'].set_edgecolor(color = 'black')

    x_max = row['max']
    ost = row['ord_street_type']    
    
    if x_max > 1:
        xlim_max = .75
    else:
        xlim_max = 0
    ax.set_xlim(-3.2, xlim_max)

    temp_gdf = wms_gdf.loc[wms_gdf['ord_street_type'] == ost, :]
    my_plot = sns.histplot(data = temp_gdf, x = 'dist_miles_log',
                          color='#ca0020', bins = 100)
    
    # total added streets
    n_added = temp_gdf.shape[0]
    n_added =  f"{n_added:,}"

    tot_miles = temp_gdf['dist_miles'].sum()    
    dist_units = 'miles'
    if tot_miles < .25:
        tot_dist = int(round(temp_gdf['dist'].sum(), 0))
        dist_units = 'feet'        
    elif tot_miles >= .25 and tot_miles <= 5:    
        tot_dist = tot_miles
        tot_dist = round(tot_dist, 2)        
    else:
        tot_dist = int(round(tot_miles, 0))        
        
    my_title =   "Histogram of added road segment length: {} (n = {} | {} {})".format(ost, n_added, tot_dist, dist_units )
    
    plt.title(label = my_title, fontsize = 16)
    plt.xlabel(xlabel = 'Added road segment  length (log-scale)')
    plt.ylabel(ylabel = "Number of misssing road segments")
    
    if x_max > 1:
        x_tick_labels = [50 / 5280, 100 / 5280, 300 / 5280, .1, .25, .5, 1, 2, 3]
        x_tick_label_text = ['50 ft', '100 ft', '300 ft',  '1/10 mi', '1/4 mi', '1/2 mi', '1 mi', '2 mi', '3 mi']
    else:
        x_tick_labels = [50 / 5280, 100 / 5280, 300 / 5280, .1, .25, .5, 1]
        x_tick_label_text = ['50 ft', '100 ft', '300 ft',  '1/10 mi', '1/4 mi', '1/2 mi', '1 mi']
        
    x_ticks = [np.log10(xtl) for xtl in x_tick_labels]
    
    my_plot.set_xticks(ticks = x_ticks)
    my_plot.set_xticklabels(labels = x_tick_label_text, rotation=0)    
    
    
    output_file_name = '..\\graphics\\histogram_{}.png'.format(row['ord_street_type'])
    output_file_name = os.path.normpath(output_file_name)
    print(output_file_name)
    my_plot.get_figure().savefig(fname = output_file_name)
    plt.close()

In [None]:
ds_st_df.head()

In [None]:
col_names = ['ord_street_type', 'street_status_desc', 'dist_miles']
ds_st_df = gdf[col_names].groupby(col_names[:-1]).agg(n_segments = ('dist_miles', 'size'),
                                                      n_miles = ('dist_miles', 'sum')).reset_index()

In [None]:
ds_st_df.head()

In [None]:
col_names = ['street_status_desc', 'dist_miles']
all_ds_st_df = gdf[col_names].groupby(col_names[:-1]).agg(n_segments = ('dist_miles', 'size'),
                                                      n_miles = ('dist_miles', 'sum')).reset_index()

In [None]:
all_ds_st_df['ord_street_type'] = 'ALL'

In [None]:
col_names = ['ord_street_type', 'street_status_desc', 'n_segments', 'n_miles']
all_ds_st_df = all_ds_st_df[col_names]

In [None]:
ds_st_df = pd.concat(objs = [all_ds_st_df, ds_st_df])

In [None]:
ds_st_df.head()

In [None]:
ds_st_df['n_miles_log'] = np.log(ds_st_df['n_miles'])
ds_st_df['street_status_desc'] = ds_st_df['street_status_desc'].str.replace('_', ' ')

In [None]:
sns.set_theme(style = "whitegrid")
f, ax = plt.subplots(figsize = (20, 5))
ax.set_ylim(0, 1600)

# these are the same green, black, and red colors used in qGIS. 
my_color_palette = {'continuous street': '#33a02c',
'discontinuous street': '#000000',
'added street': '#ca0020'}

my_plot = sns.barplot(data = ds_st_df, x = 'ord_street_type', y = 'n_miles',
                      hue = 'street_status_desc', palette=my_color_palette)

# set the top part of the plot frame - the spine - to black
ax.spines['top'].set_edgecolor(color = 'black')

y_ticks = list(range(0, 1601, 200))
y_tick_labels_formatted = ['{:,}'.format(ytl) for ytl in y_ticks]   

my_plot.set_yticks(ticks = y_ticks)
my_plot.set_yticklabels(labels = y_tick_labels_formatted, rotation=0)

my_title = "Total miles by road types and road status"

plt.title(label = my_title, fontsize = 16)
plt.xlabel(xlabel = 'Road type')
plt.ylabel(ylabel = "Miles")

plt.legend(title='Road status')

output_file_name = '..\\graphics\\barplot_miles.png'
output_file_name = os.path.normpath(output_file_name)
print(output_file_name)
my_plot.get_figure().savefig(fname = output_file_name)
plt.show()

In [None]:
sns.set_theme(style = "whitegrid")
f, ax = plt.subplots(figsize = (20, 5))
ax.set_ylim(0, 22000)

my_plot = sns.barplot(data = ds_st_df, x = 'ord_street_type', y = 'n_segments',
                      hue = 'street_status_desc', palette=my_color_palette)

# set the top part of the plot frame - the spine - to black
ax.spines['top'].set_edgecolor(color = 'black')

y_ticks = list(range(0, 22001, 2000))
y_tick_labels_formatted = ['{:,}'.format(ytl) for ytl in y_ticks]   

my_plot.set_yticks(ticks = y_ticks)
my_plot.set_yticklabels(labels = y_tick_labels_formatted, rotation=0)

my_title = "Total segments by road types and road status"

plt.title(label = my_title, fontsize = 16)
plt.xlabel(xlabel = 'Road type')
plt.ylabel(ylabel = "Number of segments")

plt.legend(title='Road status')

output_file_name = '..\\graphics\\barplot_segment_count.png'
output_file_name = os.path.normpath(output_file_name)
print(output_file_name)
my_plot.get_figure().savefig(fname = output_file_name)
plt.show()