In [None]:
%matplotlib inline

import osmnx as ox
import matplotlib.pyplot as plt
import numpy as np
import geopandas  as gpd
import pandas as pd
ox.config(use_cache=True, log_console=False)
%matplotlib inline
import h3 as h3

import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore')

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

h3_level = 10
root

In [None]:
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')
#G = ox.graph_from_place('Long Beach, CA, USA', network_type='drive')
Gc = ox.consolidate_intersections(ox.project_graph(G))
nodes = ox.graph_to_gdfs(Gc, edges=False)
nodes[['x', 'y', 'lat', 'lon']]

In [None]:
display(nodes.head())

In [None]:
nodes.highway.unique()

In [None]:
nodes.shape

In [None]:
nodes.to_file(root / 'X.data' / 'nodes_and_edges' / 'la_county_nodes' / 'la_county_nodes.shp')

In [None]:
def lat_lng_to_h3(row):
    return h3.geo_to_h3(row.lat, row.lon, h3_level)


nodes['hex_id'] = nodes.apply(lat_lng_to_h3, axis=1)

In [None]:
nodes.sample(2)

In [None]:
nodes.highway.value_counts()

In [None]:
highway_cnts = nodes.groupby(['hex_id', 'highway']).hex_id.agg('count').to_frame('count').reset_index()
highway_cnts.sample(3)

In [None]:
street_count_cnts = nodes.groupby(['hex_id', 'street_count']).hex_id.agg('count').to_frame('count').reset_index()
street_count_cnts.sample(3)

In [None]:
highway_cnts.to_csv(root / 'X.data' / 'nodes_and_edges' / 'nodes_highway_cnts.csv', index = False )

In [None]:
street_count_cnts.to_csv(root / 'X.data' / 'nodes_and_edges' / 'nodes_street_count_cnts.csv', index = False )

In [None]:
del nodes

In [None]:
#G = ox.graph_from_place('Long Beach, CA, USA', network_type='drive')
#G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')
G = ox.add_edge_speeds(G)

In [None]:
import warnings
warnings.filterwarnings('ignore')
Gc = ox.consolidate_intersections(ox.project_graph(G))

In [None]:
df_nodes = ox.graph_to_gdfs(Gc, edges=False)

In [None]:
print(df_nodes.shape)
len(df_nodes.osmid_original.unique())

In [None]:
df_edges = ox.graph_to_gdfs(Gc, nodes=False)

In [None]:
df_nodes.highway.value_counts()

In [None]:
df_edges.bridge.value_counts()

In [None]:
df_edges.shape

In [None]:
len(df_edges['osmid'].unique())

In [None]:
import re

def get_max(var):
    max_var = int(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            numbers = [int(word) for word in a_string.split() if word.isdigit()]
            var_int = int(numbers[0])
            if var_int > max_var:
                max_var = var_int       

    else:
        var = str(var)
        numbers = [int(word) for word in var.split() if word.isdigit()]
        max_var = int(numbers[0])

    return(max_var)

def get_max_float(var):
    max_var = float(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            a_string = re.findall(r'\d*\.?\d+', a_string)
            var_float = float(a_string[0])
            if var_float > max_var:
                max_var = var_float       
    else:
        var = re.findall(r'\d*\.?\d+', var)
        max_var = float(var[0])
    return(max_var)


def get_first(var):
    
    if var is np.nan:
        first_var = var
    
    elif isinstance(var, list):
        first_var = str(var[0])
    else:
        first_var = str(var)
    return(first_var)


In [None]:
df_edges.sample(3)

In [None]:
cols_to_keep = ['osmid', 'speed_kph', 'ref', 'name', 'highway','oneway','length','lanes','maxspeed','bridge','access','junction','tunnel', 'geometry']

df_edges.reset_index(inplace = True, drop = True)
df_edges = df_edges[cols_to_keep]

display(df_edges.sample(2))

In [None]:
df_edges.dtypes

In [None]:
df_edges['maxspeed'] = df_edges.apply(lambda x: get_max(x.maxspeed), axis=1)
df_edges['lanes'] = df_edges.apply(lambda x: get_max(x.lanes), axis=1)
df_edges['bridge'] = df_edges.apply(lambda x: get_first(x.bridge), axis=1)

In [None]:
df_edges['speed_kph'] = df_edges.apply(lambda x: get_first(x.speed_kph), axis=1)

In [None]:
df_edges['name'] = df_edges.apply(lambda x: get_first(x['name']), axis=1)

In [None]:
df_edges['osmid'] = df_edges.apply(lambda x: get_first(x.osmid), axis=1)

In [None]:
df_edges['ref'] = df_edges.apply(lambda x: get_first(x.ref), axis=1)

In [None]:
df_edges['highway'] = df_edges.apply(lambda x: get_first(x.highway), axis=1)

In [None]:
df_edges.sample(3)

In [None]:
df_edges.to_file(root / 'X.data' / 'semi_processed' / 'nodes_and_edges' / 'la_county_edges' / 'la_county_edges.shp', index=False)