# Construct Street Network: EC2 Instance Notebook
OSMnx is a Python package to retrieve, model, analyze, and visualize street networks from OpenStreetMap.  This notebook uses the OSMnx library to consruct a nework of nodes and edges for Los Angeles County and is meant to run on an EC2 instance due to it being computationally expensive.  There are a few linux commands that are used to transfer data from the EC2 instance to the S3 bucket.

In [None]:
%matplotlib inline

import osmnx as ox
import matplotlib.pyplot as plt
import numpy as np
import geopandas  as gpd
import pandas as pd
ox.config(use_cache=True, log_console=False)
%matplotlib inline
import h3 as h3

# import aws libraries
import boto3
# set name of S3 bucket
s3_bucket = 'traffic-data-bucket'

import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore')

import os
from os.path import isfile, join
from pathlib import Path
from os import listdir
import os
os.getcwd()

path =  Path(os.getcwd())
root = path.parent.absolute()

h3_level = 8
root

## 1. Generate network nodes
We start by creating the driving network within the city of Los Angeles and it will construct the street network.

### 1.1 Construct graph of Los Angeles County

In [None]:
# Downlaod the street network of Los Angeles County
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')

Next, we consolidate intersections and rebuild graph topology which reconnects edge geometries to the new consolidated nodes

In [None]:
# get a GeoSeries of consolidated intersections
Gc = ox.consolidate_intersections(ox.project_graph(G))

Next, convert the MultiDiGraph to a GeoPandas dataframe.

In [None]:
nodes = ox.graph_to_gdfs(Gc, edges=False)
nodes[['x', 'y', 'lat', 'lon']]

Let's view some of the nodes and the unique highways.

In [None]:
display(nodes.head())

In [None]:
nodes.shape

### 1.2 Create shapefile from network of nodes

In [None]:
nodes.to_file(root / 'X.data' / 'nodes_and_edges' / 'la_county_nodes' / 'la_county_nodes.shp')

### 1.3 Upload all node shapefiles from EC2 to S3 bucket
AWS CLI has to be installed on Ubuntu in order for this to work and can be installed via `sudo apt-get install awscli` in the terminal.

In [None]:
!aws s3 sync /home/ubuntu/X.data/nodes_and_edges/la_county_nodes s3://traffic-data-bucket/nodes_and_edges/la_county_nodes

### 1.4 Apply the hexagon id based on the latitude and longitude for each point

In [None]:
def lat_lng_to_h3(row):
    return h3.geo_to_h3(row.lat, row.lon, h3_level)


nodes['hex_id'] = nodes.apply(lat_lng_to_h3, axis=1)

In [None]:
nodes.sample(2)

In [None]:
nodes.highway.value_counts()

### 1.5 Get counts of the number of highways and streets within each hex id

In [None]:
highway_cnts = nodes.groupby(['hex_id', 'highway']).hex_id.agg('count').to_frame('count').reset_index()
highway_cnts.sample(3)

In [None]:
street_count_cnts = nodes.groupby(['hex_id', 'street_count']).hex_id.agg('count').to_frame('count').reset_index()
street_count_cnts.sample(3)

### 1.6 Write highway and street counts data to CSV and upload to S3 bucket

In [None]:
highway_cnts.to_csv(f"s3://{s3_bucket}/nodes_and_edges/nodes_highway_cnts.csv")

In [None]:
street_count_cnts.to_csv(f"s3://{s3_bucket}/nodes_and_edges/nodes_street_count_cnts.csv")

In [None]:
del nodes

## 2. Generate network edges
### 2.1 Construct graph of Los Angeles County and add edge speeds

In [None]:
#G = ox.graph_from_place('Long Beach, CA, USA', network_type='drive')
G = ox.graph_from_place('Los Angeles County, CA, USA', network_type='drive')
G = ox.add_edge_speeds(G)

In [None]:
import warnings
warnings.filterwarnings('ignore')
Gc = ox.consolidate_intersections(ox.project_graph(G))

### 2.2 Convert MultiDiGraph to a GeoDataframe of nodes

In [None]:
df_nodes = ox.graph_to_gdfs(Gc, edges=False)

In [None]:
print(df_nodes.shape)
len(df_nodes.osmid_original.unique())

### 2.3 Convert MultiDiGraph to a GeoDataframe of edges

In [None]:
df_edges = ox.graph_to_gdfs(Gc, nodes=False)

### 2.4 Get counts for highways and bridges

In [None]:
df_nodes.highway.value_counts()

In [None]:
df_edges.bridge.value_counts()

In [None]:
df_edges.shape

## 3. Feature engineering
### 3.1 Create functions to get maximum and first values for features

In [None]:
import re

def get_max(var):
    max_var = int(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            numbers = [int(word) for word in a_string.split() if word.isdigit()]
            var_int = int(numbers[0])
            if var_int > max_var:
                max_var = var_int       

    else:
        var = str(var)
        numbers = [int(word) for word in var.split() if word.isdigit()]
        max_var = int(numbers[0])

    return(max_var)

def get_max_float(var):
    max_var = float(0)
    if var is np.nan:
        max_var = var
    
    elif isinstance(var, list):
        for a_string in var:
            a_string = str(a_string)
            a_string = re.findall(r'\d*\.?\d+', a_string)
            var_float = float(a_string[0])
            if var_float > max_var:
                max_var = var_float       
    else:
        var = re.findall(r'\d*\.?\d+', var)
        max_var = float(var[0])
    return(max_var)


def get_first(var):
    
    if var is np.nan:
        first_var = var
    
    elif isinstance(var, list):
        first_var = str(var[0])
    else:
        first_var = str(var)
    return(first_var)


In [None]:
df_edges.sample(3)

In [None]:
cols_to_keep = ['osmid', 'speed_kph', 'ref', 'name', 'highway','oneway','length','lanes','maxspeed','bridge','access','junction','tunnel', 'geometry', ]

df_edges.reset_index(inplace = True, drop = True)
df_edges = df_edges[cols_to_keep]

display(df_edges.sample(2))

In [None]:
df_edges.dtypes

### 3.2 Add traffic and road features to edges dataset
Generate new features for each fo the edges such as maximum speed, number of lanes, highway type, etc.

In [None]:
df_edges['maxspeed'] = df_edges.apply(lambda x: get_max(x.maxspeed), axis=1)

In [None]:
#df_edges['width'] = df_edges.apply(lambda x: get_max(x.width), axis=1)

In [None]:
df_edges['lanes'] = df_edges.apply(lambda x: get_max(x.lanes), axis=1)

In [None]:
df_edges['bridge'] = df_edges.apply(lambda x: get_first(x.bridge), axis=1)

In [None]:
df_edges['speed_kph'] = df_edges.apply(lambda x: get_first(x.speed_kph), axis=1)

In [None]:
df_edges['name'] = df_edges.apply(lambda x: get_first(x['name']), axis=1)

In [None]:
df_edges['osmid'] = df_edges.apply(lambda x: get_first(x.osmid), axis=1)

In [None]:
df_edges['ref'] = df_edges.apply(lambda x: get_first(x.ref), axis=1)

In [None]:
df_edges['highway'] = df_edges.apply(lambda x: get_first(x.highway), axis=1)

In [None]:
df_edges['oneway'] = df_edges.apply(lambda x: get_first(x.oneway), axis=1)

In [None]:
df_edges['access'] = df_edges.apply(lambda x: get_first(x.access), axis=1)

In [None]:
df_edges['junction'] = df_edges.apply(lambda x: get_first(x.junction), axis=1)

In [None]:
df_edges['tunnel'] = df_edges.apply(lambda x: get_first(x.tunnel), axis=1)

In [None]:
df_edges.sample(3)

### 3.3 Create shapefile from network of edges

In [None]:
df_edges.to_file(root / 'X.data' /  'nodes_and_edges' / 'la_county_edges' / 'la_county_edges.shp', index=False)

### 3.4 Upload all edge shapefiles from EC2 Instance to S3 bucket

In [None]:
!aws s3 sync /home/ubuntu/X.data/nodes_and_edges/la_county_edges s3://traffic-data-bucket/nodes_and_edges/la_county_edges

In [None]:
df_edges.shape