In [10]:
import time
import copy
import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd
import multiprocess

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/%autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = None  # display all columns

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

sys.path.append(os.environ['project_root'])

In [12]:
from carpoolsim.basic_settings import *

from carpoolsim.prepare_input import (
    get_shortest_paths,
    TrafficNetwork
)

In [13]:
# load traffic network data
tazs = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)
df_nodes = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "nodes.shp")
)
df_links = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "links.shp")
)

In [14]:
df_nodes.dtypes

nid            int64
lon          float64
lat          float64
x            float64
y            float64
geometry    geometry
dtype: object

In [15]:
df_links["a"] = df_links["a"].astype(str)
df_links["b"] = df_links["b"].astype(str)
df_links["a_b"] = df_links["a_b"].astype(str)

df_nodes["nid"] = df_nodes["nid"].astype(str)

In [19]:
df_links.sample(2)

Unnamed: 0,a,b,a_b,name,distance,factype,speed_limi,geometry
39878,25603,73584,25603_73584,Milstead Ave,0.08847,14,0,"LINESTRING (-84.01592 33.67172, -84.01558 33.6..."
29785,15402,80646,15402_80646,,0.16636,7,0,"LINESTRING (-84.49127 33.76253, -84.49269 33.7..."


In [24]:
# init object
traffic_network = TrafficNetwork(
    network_links=df_links.copy(),
    network_nodes=df_nodes.copy(),
    tazs=tazs.copy(),
)

In [25]:
traffic_network.get_taz_id_list()
traffic_network.convert_abm_links()
traffic_network.build_network()

In [26]:
# load processed network to a new place...
traffic_network.gdf_nodes.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "nodes.shp"
    ),
    crs="EPSG:4326"
)
traffic_network.gdf_links.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "links.shp"
    ),
    crs="EPSG:4326"
)
traffic_network.tazs.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "tazs.shp"
    ),
    crs="EPSG:4326"
)

## Prepare dataset for shortest paths between any pair of TAZ origin/destination.

In [19]:
network_dict = traffic_network.network_dict
destination_lst = traffic_network.tazs_ids
taz_lst = traffic_network.prepare_taz_lists(chunk_size=100)
task_inputs = [
    (network_dict, destination_lst, taz)
    for taz in taz_lst
]

In [20]:
t0 = time.perf_counter()
results = None
with multiprocess.Pool(NUM_PROCESSES) as pool:
    results = pool.starmap(get_shortest_paths, task_inputs)

d1 = time.perf_counter() - t0
print(f'It takes {d1/60:.1f} minutes to finish the run')

Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 1

## Store results in a database

In [23]:
# create database connection
from carpoolsim.database.prepare_database import batch_store_from_lst

In [24]:
# If 1_000 TAZs, the dataset contains
# 1000 * 1000 = 1_000_000 trip records between them
batch_store_from_lst(results, DB_URL)

sqlite engine created
engine successfully connectedd to the database
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data t

## Try to access the database

Those are some tools to access the dataset and query results

In [26]:
from sqlalchemy import create_engine

from carpoolsim.database.query_database import (
    query_od_info,
    execute_sql_command
)

In [27]:
engine = create_engine(DB_URL)

In [28]:
# from TAZ "1404" to TAZ "2820"
results = query_od_info(engine, '1404', '2820')
print(results)

('1404', '2820', 10.90411714285714, ['1404', '25495', '1403', '10415', '1402', '65883', '14841', '10304', '81987', '10457', '10459', '14869', '10458', '10467', '10464', '20940', '10469', '10472', '10473', '10479', '83091', '83093', '10474', '13593', '11959', '11960', '83099', '83101', '11961', '14599', '81454', '81455', '14597', '14596', '81451', '36631', '2820'])


In [29]:
sql_command = "SELECT * FROM dists LIMIT 5;"

tot_lines = execute_sql_command(engine, sql_command, mode="fetchall")
display(tot_lines)

[('1', '1', 0.0, "['1']"),
 ('1', '2', 1.6642285714285716, "['1', '80483', '2']"),
 ('1', '3', 1.9914342857142857, "['1', '65666', '3']"),
 ('1', '4', 4.348765714285714, "['1', '65666', '3', '74491', '4']"),
 ('1', '5', 5.66832, "['1', '65666', '3', '74491', '4', '74487', '5']")]