## Prepare path retention

Remember the paths between each pair of TAZ centroids.

For example, if you have 1000 TAZs, there will be $1000 \times 1000 = 1\_000\_000$ (one million) OD pairs.

The shortest travel paths between ODs are pretrained and pre-computed in a database to record
- The shortest paths between each OD pair
- The shortest travel distance between each OD pair

In [1]:
import time
import copy
import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd
import multiprocess

%load_ext autoreload
%autoreload 2
pd.options.display.max_columns = None  # display all columns

In [2]:
from carpoolsim.basic_settings import *

from carpoolsim.prepare_input import (
    get_shortest_paths,
    TrafficNetwork
)

In [3]:
# need to set this environmental path everytime you moves the project root folder
os.environ['project_root'] = '/Users/geekduck/Dropbox/gt_survey'

In [4]:
# load traffic network data
tazs = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "tazs.shp")
)
df_nodes = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "nodes.shp")
)
df_links = gpd.read_file(
    os.path.join(os.environ['data_inputs'], "cleaned", "links.shp")
)

In [5]:
df_nodes.dtypes

nid           object
lon          float64
lat          float64
x            float64
y            float64
geometry    geometry
dtype: object

In [6]:
df_links["a"] = df_links["a"].astype(str)
df_links["b"] = df_links["b"].astype(str)
df_links["a_b"] = df_links["a_b"].astype(str)

df_nodes["nid"] = df_nodes["nid"].astype(str)

In [7]:
df_links.sample(2)

Unnamed: 0,a,b,a_b,name,speed_limi,distance,factype,ax,ay,a_lat,a_lon,bx,by,b_lat,b_lon,minx_sq,miny_sq,maxx_sq,maxy_sq,time,geometry
35537,20432,534,20432_534,,35.0,0.21349,0,2240501.0,1380088.0,33.79382,-84.35127,2239453.0,1380504.0,33.794957,-84.354722,-0.0,0.0,-0.0,0.0,0.0061,"LINESTRING (-84.35127 33.79382, -84.35472 33.7..."
67665,80593,74138,80593_74138,Powers Ferry Rd,35.0,0.04427,14,2226319.0,1409645.0,33.87497,-84.39817,2226396.0,1409425.0,33.874367,-84.397916,-0.0,0.0,-0.0,0.0,0.001265,"LINESTRING (-84.39817 33.87497, -84.39792 33.8..."


In [8]:
# init object
traffic_network = TrafficNetwork(
    network_links=df_links.copy(),
    network_nodes=df_nodes.copy(),
    tazs=tazs.copy(),
)

In [9]:
traffic_network.get_taz_id_list()
traffic_network.convert_abm_links()
traffic_network.build_network()

In [10]:
# load processed network to a new place...
traffic_network.gdf_nodes.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "nodes.shp"
    ),
)
traffic_network.gdf_links.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "links.shp"
    ),
)
traffic_network.tazs.to_file(
    os.path.join(
        os.environ['data_inputs'], 
        "cleaned", 
        "tazs.shp"
    ),
)

## Prepare dataset for shortest paths between any pair of TAZ origin/destination.

In [11]:
network_dict = traffic_network.network_dict
destination_lst = traffic_network.tazs_ids
taz_lst = traffic_network.prepare_taz_lists(chunk_size=100)
task_inputs = [
    (network_dict, destination_lst, taz)
    for taz in taz_lst
]

In [12]:
NUM_PROCESSES

12

In [13]:
t0 = time.perf_counter()
results = None
with multiprocess.Pool(NUM_PROCESSES) as pool:
    results = pool.starmap(get_shortest_paths, task_inputs)

d1 = time.perf_counter() - t0
print(f'It takes {d1/60:.1f} minutes to finish the run')

Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 100 tazs
Finished searching 1

## Store results in a database
Just store results in the sqlite database. Results are in the file "data_inputs/path_retention1.db"

In [16]:
# create database connection
from carpoolsim.database.prepare_database import batch_store_from_lst

In [18]:
len(results)

59

In [19]:
# If 1_000 TAZs, the dataset contains
# 1000 * 1000 = 1_000_000 trip records between them
batch_store_from_lst(results, DB_URL)

sqlite engine created
engine successfully connected to the database
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to the server!
start feeding data to database! Dataframe shape:  (587300, 2)
Appended a whole batch data to

## Try to access the database

Those are some APIs to access the path retention dataset and query results.

In [3]:
from sqlalchemy import create_engine

from carpoolsim.database.query_database import (
    query_od_info,
    execute_sql_command
)

from carpoolsim.basic_settings import *

In [4]:
engine = create_engine(DB_URL)

In [5]:
# from TAZ "1404" to TAZ "2820"
results = query_od_info(engine, '1404', '2820')
print(results)

('1404', '2820', 10.90411714285714, ['1404', '25495', '1403', '10415', '1402', '65883', '14841', '10304', '81987', '10457', '10459', '14869', '10458', '10467', '10464', '20940', '10469', '10472', '10473', '10479', '83091', '83093', '10474', '13593', '11959', '11960', '83099', '83101', '11961', '14599', '81454', '81455', '14597', '14596', '81451', '36631', '2820'])


First record:
- ('1', '1', 0.0, "['1']"):
  - from TAZ1 to TAZ1, the distance is zero, the node to use is "1"
- ('1', '2', 1.6642285714285716, "['1', '80483', '2']")
  - from TAZ 1 to TAZ 2, the distance is 1.664, the nodes to travel along are "1", "80483", "2"

In [6]:
sql_command = "SELECT * FROM dists LIMIT 5;"

tot_lines = execute_sql_command(engine, sql_command, mode="fetchall")
display(tot_lines)

[('1', '1', 0.0, "['1']"),
 ('1', '2', 1.6642285714285716, "['1', '80483', '2']"),
 ('1', '3', 1.9914342857142857, "['1', '65666', '3']"),
 ('1', '4', 4.348765714285714, "['1', '65666', '3', '74491', '4']"),
 ('1', '5', 5.66832, "['1', '65666', '3', '74491', '4', '74487', '5']")]