# graph production


import needed modules and set confige

In [1]:
#import needed modules
import config
import geopandas as gpd
import json
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
import time
#set config folder
ox.config(use_cache=True,#if True, use a local cache to save/retrieve http responses
          log_file=True,# if true, save log output to a log file in logs_folder
          log_console=True,#if true, print log output to the console
          log_filename='calculate-cities',#name of the logger
          cache_folder="folder")#where to save the http response cache

print(ox.__version__)
print(nx.__version__)

0.10
2.2


In [2]:
network_type = 'drive'#what type of street network to get
retain_all = True#if True, return the entire graph even if it is not connected
simplify = True# if true, simplify the graph topology

input_folder = 'places' #place shapefiles
output_folder_shapefile = "shapefile" #where to save graph shapefiles
output_folder_graphml = "graphml" #where to save graphml files
output_folder_lists = "node/edges list" #where to save node/edge lists

load shapefiles and set the name of output folder for each country

In [3]:
def load_shapefile(shapefile, input_folder=input_folder):
    
    cities = gpd.read_file('{}\{}'.format(input_folder, shapefile))
    
    # create the output path to save 
    #create specefic output folders for each country individually using the attributes of shapefiles
    code= cities['iso3'].unique()[0]# 'iso3' is a filed of shapefiles indicating the abbrevation for countries
    name= cities['name'].unique()[0]# 'name' is a filed of shapefiles indicating the name of countries
    folder = '{}_{}'.format(code, name)
    
    # tigerline data is epsg:4269, but osm uses epsg:4326, so project it
    cities = cities.to_crs({'init':'epsg:4326'})
    
    print('{} loaded {} cities'.format(folder, len(cities)))
    return cities, folder


set the name of output folder for each city 

In [4]:
def make_queries(cities):
    
    cities['city_folder'] = cities.apply(lambda row: '{}_{}'.format(row['fuacode_si'],
                                                                    row['fuaname_en']).replace(' ', '_'), axis=1)
    #'fuacode_si' is a filed of shapefiles indicating the specefic code for each city.
    #'fuaname_en' is a filed of shapefiles indicating the english name of each city.
    queries = cities.apply(lambda row: {'city_folder':row['city_folder'].replace('/', '_'),
                                        'polygon':row['geometry']}, axis=1).tolist()
    
    
    return queries

get the graphs within the polygon of each city 

In [5]:
def download_save_graph(query, folder):
    
    
    #set the path of output files.
    output_path_shapefile = '{}/{}'.format(output_folder_shapefile, folder)
    output_path_graphml ='{}/{}/{}'.format(output_folder_lists, folder, query['city_folder'])
    output_path_lists = '{}/{}/{}'.format(output_folder_lists, folder, query['city_folder'])

    # load graph and save it if it hasn't already been saved in the output_path
    if not os.path.exists('{}/{}.graphml'.format(output_path_graphml, query['city_folder'])):
        
        polygon = query['polygon']
        city_folder = query['city_folder']
        graph_name = '{}_{}'.format(folder, city_folder)
        
        # fix trivially invalid geometries (nested shells, ring self-intersections, etc)
        polygon = polygon.buffer(0)

        G = ox.graph_from_polygon(polygon=polygon,
                                  network_type=network_type, 
                                  name=graph_name,
                                  simplify=simplify,
                                  retain_all=retain_all)

        save_node_edge_lists(G, output_path_lists)
        ox.save_graphml(G, folder=output_path_graphml, filename='{}.graphml'.format(city_folder))
        ox.save_graph_shapefile(G, folder=output_path_shapefile, filename=city_folder)

get nodes and edges in lists

In [6]:
def save_node_edge_lists(G, lists_folder):
    
    # save node and edge lists as csv
    nodes, edges = ox.graph_to_gdfs(G, node_geometry=False, fill_edge_geometry=False)
    edges['length'] = edges['length'].round(3)

    ecols = ['u', 'v', 'key', 'oneway', 'highway', 'name', 'length',
             'lanes', 'width', 'est_width', 'maxspeed', 'access', 'service',
             'bridge', 'tunnel', 'area', 'junction', 'osmid', 'ref']

    edges = edges.drop(columns=['geometry']).reindex(columns=ecols)
    nodes = nodes.reindex(columns=['osmid', 'x', 'y', 'ref', 'highway'])

    if not os.path.exists(lists_folder):
        os.makedirs(lists_folder)
    nodes.to_csv('{}/node_list.csv'.format(lists_folder), index=False, encoding='utf-8')
    edges.to_csv('{}/edge_list.csv'.format(lists_folder), index=False, encoding='utf-8')

how many cities are there?

In [8]:
list=os.listdir(input_folder)
main_shapefile=["the name of country's shapefile".shp]

gdf = gpd.GeoDataFrame()
for place in main_shapefile:
    gdf_tmp = gpd.read_file('{}/{}'.format(input_folder, place))
    gdf = gdf.append(gdf_tmp)

cities = gdf
len(cities)

35

# run it

In [9]:
all_start_time = time.time()

for shapefile in main_shapefile:
    
    start_time = time.time()
    
    # load shapefile to get the city boundaries in this state
    cities, folder = load_shapefile(shapefile)
    queries = make_queries(cities)
    count = 0
    
    for query in queries:
        try:
            download_save_graph(query, folder)
            count += 1
        except Exception as e:
            ox.log('"{}" failed: {}'.format(query['city_folder'], e), level=lg.ERROR)
            

    print('Finished making {} {} graphs in {:,.1f} seconds\n'.format(count, state_folder, time.time()-start_time))

print('All finished in {:,.1f} seconds'.format(time.time()-all_start_time))

NLD_Netherlands loaded 35 cities
Finished making 35 NLD_Netherlands graphs in 9,656.1 seconds

All finished in 9,656.3 seconds
