In [1]:
# standard
import os

In [2]:
# external
import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd

In [3]:
# custom
from geo_data_io.fc_df_spatial import write_gdf
import run_constants as rc
from utils import create_city_sector, create_name


In [4]:
# custom
import run_constants as rc
from utils import create_name, generate_street_end_vertices
from geo_data_io.fc_df_spatial import keep_largest_geometry, write_gdf

In [5]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S02_SND_IN_FILE_NAME)

In [6]:
gdf = gpd.read_file(filename = fpn)

In [7]:
gdf.dtypes

f_intr_id                          int64
t_intr_id                          int64
snd_id                             int64
snd_feacode                        int64
citycode                           int64
stname_id                          int64
st_code                            int64
arterial_code                      int64
segment_type                       int64
agency_code                        int64
access_code                        int64
divided_code                       int64
structure_type                     int64
legalloc_code                      int64
vehicle_use_code                   int64
gis_seg_length                   float64
l_adrs_from                        int64
l_adrs_to                          int64
r_adrs_from                        int64
r_adrs_to                          int64
ord_pre_dir                       object
ord_street_name                   object
ord_street_type                   object
ord_suf_dir                       object
ord_stname_conca

In [8]:
# create a graph

In [9]:
sg = nx.from_pandas_edgelist(df = gdf,
source = 'f_intr_id', target = 't_intr_id', edge_attr = True)

In [11]:
sg.number_of_nodes()

23251

In [16]:
sg.has_edge(u = 9083, v = 3024)

False

# load the final output data

In [17]:
fpn = os.path.join(rc.OUTPUT_FILE_PATH, rc.S05_MISSING_IN_FILE_NAME)

In [18]:
gdf = gpd.read_file(filename = fpn)

In [19]:
gdf.dtypes

snd_id                      int64
ord_street_name            object
ord_street_type            object
ord_stname_type            object
ord_stname_concat          object
ord_stname_unique          object
ord_stname_type_group      object
ord_street_type_rank       object
snd_group                   int64
group_id                    int64
city_sector                object
city_sector_group          object
street_status               int64
sn_id                       int64
en_id                       int64
dist                      float64
dist_miles                float64
geometry                 geometry
dtype: object

In [20]:
# now, compare the data, by node id to node id

In [22]:
def find_existing_segments(row):
    return int(sg.has_edge(u = row['sn_id'], v = row['en_id']))

In [23]:
gdf['existing_segment'] = gdf.apply(func = find_existing_segments, axis = 1)

In [24]:
gdf['existing_segment'].sum()

np.int64(26577)

In [25]:
gdf.shape

(29776, 19)

In [26]:
gdf['n_segments'] = int(1)

In [35]:
# do a pivot!
pv_df = pd.pivot_table(data = gdf, 
                       values = 'n_segments',
                       index = 'street_status',
                       columns = 'existing_segment',
                       aggfunc='sum',
                       fill_value = 0,
                       margins = True).reset_index()
pv_df.columns = ['street_status', 'not_existing', 'is_existing', 'total']

In [36]:
pv_df

Unnamed: 0,street_status,not_existing,is_existing,total
0,0,0,3812,3812
1,1,0,21938,21938
2,2,2806,810,3616
3,3,393,17,410
4,All,3199,26577,29776


In [33]:
810 / 3616

0.22400442477876106

In [34]:
17 / 410

0.041463414634146344

In [None]:
pv_df.loc[(pv_df[''])]

In [37]:
gdf.dtypes

snd_id                      int64
ord_street_name            object
ord_street_type            object
ord_stname_type            object
ord_stname_concat          object
ord_stname_unique          object
ord_stname_type_group      object
ord_street_type_rank       object
snd_group                   int64
group_id                    int64
city_sector                object
city_sector_group          object
street_status               int64
sn_id                       int64
en_id                       int64
dist                      float64
dist_miles                float64
geometry                 geometry
existing_segment            int64
n_segments                  int64
dtype: object

In [39]:
gdf['street_status_reclass'] = gdf['street_status'].astype(str) + '_' + gdf['existing_segment'].astype(str)

In [40]:
# let's save
write_gdf(gdf = gdf, output_file_path=rc.OUTPUT_FILE_PATH,
          output_file_name='missing_segments_reclass.gpkg')

In [41]:
1 / (0.13 / 0.29)

2.2307692307692304