In [1]:
# mike babb
# 2024 06 28
# what streets start and stop?

In [2]:
# standard
import os

In [3]:
# external
from itertools import combinations, product
import geopandas as gpd
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from shapely.geometry import LineString, Point
from shapely import line_merge

In [4]:
# custom
from geodataio.geo_operations import points2distance, calculate_initial_compass_bearing
from utils import *

# load the street network

In [5]:
# file path
input_file_path = 'H:/project/seattle_streets/data/' 

In [6]:
file_name = 'missing_segments.gpkg'

In [7]:
fpn = os.path.join(input_file_path, file_name)

In [8]:
gdf = gpd.read_file(filename = fpn)

In [9]:
gdf.columns

Index(['f_intr_id', 't_intr_id', 'snd_id', 'snd_feacode', 'citycode',
       'stname_id', 'st_code', 'arterial_code', 'segment_type', 'agency_code',
       'access_code', 'divided_code', 'structure_type', 'legalloc_code',
       'vehicle_use_code', 'gis_seg_length', 'l_adrs_from', 'l_adrs_to',
       'r_adrs_from', 'r_adrs_to', 'ord_pre_dir', 'ord_street_name',
       'ord_street_type', 'ord_suf_dir', 'ord_stname_concat', 'l_city',
       'l_state', 'l_zip', 'r_city', 'r_state', 'r_zip', 'sndseg_update',
       'compkey', 'comptype', 'unitid', 'unitid2', 'city_portion', 'geometry'],
      dtype='object')

# WHAT STREET NAME HAS THE MOST STREET TYPES

In [56]:
# what street has the most number of types?
col_names = ['ord_street_name', 'ord_stname_concat', 's]
gdf_agg = gdf[col_names].drop_duplicates()

In [58]:
gdf_agg.loc[gdf_agg['ord_street_name'] == '34TH', 'ord_stname_concat'].unique()

array(['34TH AVE S', '34TH AVE', 'N 34TH ST', '34TH AVE NW',
       '34TH AVE SW', '34TH PL SW', '34TH AVE E', '34TH AVE W',
       '34TH AVE NE', '34TH CT S', '34TH PL S', '34TH CT W'], dtype=object)

In [52]:
gdf_agg = gdf_agg.groupby(col_names[:-1]).agg(n_streets = ('ord_stname_concat', 'size')).reset_index()

In [53]:
gdf_agg['n_streets'].describe()

count    1223.000000
mean        2.041701
std         2.030228
min         1.000000
25%         1.000000
50%         1.000000
75%         2.000000
max        12.000000
Name: n_streets, dtype: float64

In [49]:
gdf_agg = gdf_agg.sort_values(by = ['n_streets'], ascending = False)

In [50]:
gdf_agg.head()

Unnamed: 0,ord_street_name,n_streets
81,34TH,12
84,37TH,12
76,31ST,11
79,33RD,11
82,35TH,11


In [None]:
ms_gdf.shape

In [None]:
ms_gdf.head()

In [None]:
# group by and descibe
wms_gdf = ms_gdf.loc[ms_gdf['same_snd_group'] == 0, :].copy()

In [None]:
wms_gdf.groupby('ord_street_type')['dist_miles'].describe()

In [None]:
# COUNT UNIQUE STREET GROUPS

In [None]:
col_names = ['ord_stname_concat', 'ord_street_type', 'snd_group']
gdf_agg = gdf[col_names].drop_duplicates().groupby(col_names[:-1]).agg( snd_group_count = ('snd_group', 'size')).reset_index()

gdf_agg.shape

gdf_agg.head()

gdf_agg['snd_group_count'].describe()

# number of streets with at least two segments
split_streets = gdf_agg.loc[gdf_agg['snd_group_count'] > 1, :].shape[0]
split_streets

# percent
split_streets / gdf_agg.shape[0]

complete_streets = gdf_agg.loc[gdf_agg['snd_group_count'] == 1, :].shape[0]
complete_streets

# percent
complete_streets / gdf_agg.shape[0]

# 55 percent of streets are single segment
# 45 percent of streets are multi-segments.
# TODO: figure out how much of this is road miles.

# LET'S MAKE A GRAPHIC SHOWING THE DISTRIBUTION OF ADDED STREETS

In [None]:
# find the longest
ms_gdf['same_snd_group'].value_counts()

In [None]:
gdf['ord_stname_concat'].unique().shape

# LONGEST MISSING STREETS

In [None]:
wms_gdf['dist_rank'] = wms_gdf['dist'].rank(method = 'dense', ascending = False)

In [None]:
wms_gdf.loc[wms_gdf['dist_rank'] <= 10, 'ord_stname_concat'].tolist()

In [None]:
wms_gdf['dist_miles_log'] = np.log10(wms_gdf['dist_miles'])

In [None]:
wms_gdf['dist_miles_log'].describe()

In [None]:
my_start_list = [100, 250, 500]

for ii in range(1, 5):
    my_calc = int((5280 * (ii / 4)))    
    my_start_list.append(my_calc)    

my_start_list

for ii in range(1, 2):
    for jj in range(0, 5):
        my_calc = int((5280 * (ii / 1)) + (5280 * jj))        
        my_start_list.append(my_calc)    

x_tick_list = [x for x in my_start_list]

In [None]:
x_tick_list

In [None]:
# let's make a graphic showing the distances
sns.set_theme(style = "whitegrid")
f, ax = plt.subplots(figsize = (20, 5))

my_plot = sns.histplot(data = wms_gdf, x = 'dist_miles_log',
                      color='darkgreen', bins = 100)

y_ticks = list(range(0, 176, 25))
y_tick_labels_formatted = ['{:,}'.format(ytl) for ytl in y_ticks]   

my_plot.set_yticks(ticks = y_ticks)
my_plot.set_yticklabels(labels = y_tick_labels_formatted, rotation=0)

plt.title(label = "Histogram of missing segment length")
plt.xlabel(xlabel = 'Missing segment length')
plt.ylabel(ylabel = "Count")

x_tick_labels = [50 / 5280, 100 / 5280, 250 / 5280, 500 / 5280, .25, .5, .75, 1, 1.5, 2, 3]
x_ticks = [np.log10(xtl) for xtl in x_tick_labels]

x_tick_label_text = ['50 F', '100 F', '250 F',  '500 F', '1/4 M', '1/2 M', '3/4 M', '1 M', '1.5 M', '2 M', '3 M']
#x_ticks = [-3, -2, -1, 0, .5]
#x_tick_labels = [10**xt for xt in x_ticks]
#x_tick_labels_formatted = ['{:,}'.format(10**xtl) for xtl in x_ticks]   

my_plot.set_xticks(ticks = x_ticks)
my_plot.set_xticklabels(labels = x_tick_label_text, rotation=0)

output_file_name = '..\\assets\\dist_histogram.png'
output_file_name = os.path.normpath(output_file_name)
print(output_file_name)
my_plot.get_figure().savefig(fname = output_file_name)

In [None]:
temp_wms_gdf = wms_gdf.loc[wms_gdf['ord_street_type'].isin(['AVE', 'ST']), :]

In [None]:
# let's make a graphic showing the distances
sns.set_theme(style = "whitegrid")
f, ax = plt.subplots(figsize = (20, 5))

my_plot = sns.kdeplot(data = temp_wms_gdf, x = 'dist_miles_log', fill = True,
                      hue = 'ord_street_type')

In [None]:
file_name = '../testo.geojson'

In [None]:
tt = temp_wms_gdf.loc[temp_wms_gdf['ord_stname_concat'].str.find('GALER') > 0, :].copy()

In [None]:
tt.to_file(filename = file_name, driver = 'geojson', index = False)

In [None]:
y_ticks = list(range(0, 176, 25))
y_tick_labels_formatted = ['{:,}'.format(ytl) for ytl in y_ticks]   

my_plot.set_yticks(ticks = y_ticks)
my_plot.set_yticklabels(labels = y_tick_labels_formatted, rotation=0)

plt.title(label = "Histogram of missing segment length")
plt.xlabel(xlabel = 'Missing segment length')
plt.ylabel(ylabel = "Count")

x_tick_labels = [50 / 5280, 100 / 5280, 250 / 5280, 500 / 5280, .25, .5, .75, 1, 1.5, 2, 3]
x_ticks = [np.log10(xtl) for xtl in x_tick_labels]

x_tick_label_text = ['50 F', '100 F', '250 F',  '500 F', '1/4 M', '1/2 M', '3/4 M', '1 M', '1.5 M', '2 M', '3 M']
#x_ticks = [-3, -2, -1, 0, .5]
#x_tick_labels = [10**xt for xt in x_ticks]
#x_tick_labels_formatted = ['{:,}'.format(10**xtl) for xtl in x_ticks]   

my_plot.set_xticks(ticks = x_ticks)
my_plot.set_xticklabels(labels = x_tick_label_text, rotation=0)

output_file_name = '..\\assets\\dist_histogram.png'
output_file_name = os.path.normpath(output_file_name)
print(output_file_name)
my_plot.get_figure().savefig(fname = output_file_name)