# Prepare Paratransit Trips

In [1]:
import geopandas as gpd
import pandas as pd
import os
import datetime as dt
import time
from copy import deepcopy
import numpy as np
import osmnx as ox
import networkx as nx
import scipy
import sys
import pickle
import math
from shapely.geometry import Point
from pyproj import Transformer

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

os.chdir(os.path.join(os.getcwd(), ".."))
print(os.getcwd())

/home/jovyan/work/code/paratransit-mdp


In [2]:
# Global VARS

DATA_DIR = os.path.join(os.getcwd(), "data")
TRAVEL_TIME_MATRIX_DIR = os.path.join(os.getcwd(), "data", "travel_time_matrix")
TRIPS_PATH = os.path.join(os.getcwd(), "data", "CARTA", "base", "para_transit_trips_2021.csv")
TIME_BUF = 15

In [3]:
# load travel_time_matrix

filepath = os.path.join(TRAVEL_TIME_MATRIX_DIR, 'travel_time_matrix.csv')
with open(filepath, 'rb') as f:
    travel_time_matrix = np.loadtxt(f, delimiter=",", dtype=float)

In [3]:
# load nodes

transformer_4326_5070 = Transformer.from_crs("EPSG:4326", "EPSG:5070", always_xy=True)
def apply_new_crs(p, transformer):
    lon, lat = transformer_4326_5070.transform(p.x, p.y)
    return Point(lon, lat)

dtype = {'node_id': int, 'osmid': int, 'lat': float, 'lon': float}
filepath = os.path.join(TRAVEL_TIME_MATRIX_DIR, 'nodes.csv', )
nodes = pd.read_csv(filepath, dtype=dtype)
nodes['geom_4326'] = nodes.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
nodes['geom_5070'] = nodes['geom_4326'].apply(lambda x: apply_new_crs(x, transformer_4326_5070))
nodes = gpd.GeoDataFrame(nodes)
nodes = nodes.set_geometry('geom_5070')
nodes = nodes.set_crs("EPSG:5070")
nodes.head(3)

Unnamed: 0,node_id,osmid,lat,lon,geom_4326,geom_5070
0,0,66923001,34.984104,-85.145232,POINT (-85.14523199999999 34.9841039),POINT (980610.523 1381086.291)
1,1,66937537,34.986203,-85.15179,POINT (-85.15179019999999 34.9862029),POINT (979993.888 1381251.956)
2,2,66937546,34.985842,-85.151119,POINT (-85.1511191 34.9858419),POINT (980058.850 1381218.750)


In [5]:
# load trips

usecols = ['Direct Distance', 'Sch Time in HH:MM:SS', 'Date', 'Pickup lat', 'Pickup lon', 'Dropoff lat', 'Dropoff lon', 'Passenger Types', 'AM/WC']
dtype = {'Direct Distance': float, 'Sch Time in HH:MM:SS': str, 'Date': str, 'Pickup lat': float, 'Pickup lon': float, 'Dropoff lat': float, 'Dropoff lon': float, 'Passenger Types': str, 'AM/WC': str}
trips = pd.read_csv(TRIPS_PATH, usecols=usecols, dtype=dtype)
columns = {'Direct Distance': 'distance', 'Sch Time in HH:MM:SS': 'time', 'Date': 'date', 'Pickup lat': 'pickup_lat', 'Pickup lon': 'pickup_lon', 'Dropoff lat': 'dropoff_lat', 'Dropoff lon': 'dropoff_lon', 'Passenger Types': 'passenger_types', 'AM/WC': 'am_wc'}
trips = trips.rename(columns=columns)
print(len(trips))
trips = trips.dropna(subset=['time', 'date', 'pickup_lat', 'pickup_lon', 'dropoff_lat', 'dropoff_lon'])
print(len(trips))
trips['geom_4326_pickup'] = trips.apply(lambda row: Point(row['pickup_lon'], row['pickup_lat']), axis=1)
trips['geom_4326_dropoff'] = trips.apply(lambda row: Point(row['dropoff_lon'], row['dropoff_lat']), axis=1)
trips['geom_5070_pickup'] = trips['geom_4326_pickup'].apply(lambda x: apply_new_crs(x, transformer_4326_5070))
trips['geom_5070_dropoff'] = trips['geom_4326_dropoff'].apply(lambda x: apply_new_crs(x, transformer_4326_5070))

trips.head(2)

25843
25843


Unnamed: 0,distance,time,date,pickup_lat,pickup_lon,dropoff_lat,dropoff_lon,passenger_types,am_wc,geom_4326_pickup,geom_4326_dropoff,geom_5070_pickup,geom_5070_dropoff
0,9.76,4:15:00,2021-01-01,35.045644,-85.319982,35.022033,-85.241765,WH1,WC,POINT (-85.319982 35.045644),POINT (-85.241765 35.022033),POINT (964118.6371208868 1386139.680921921),POINT (971447.881122148 1384311.182383026)
1,7.99,4:15:00,2021-01-01,35.032584,-85.316599,35.022087,-85.241776,AM1,AM,POINT (-85.316599 35.032584),POINT (-85.241776 35.022087),POINT (964586.6717501065 1384721.948728439),POINT (971446.2095713749 1384317.073218042)


In [5]:
def get_nearest_node(p, nodes, buf=400):
    node_id, osmid, distance_to_node = -1, -1, -1
    geom = p.buffer(buf)
    result = nodes[nodes['geom_5070'].within(geom)].copy()
    if len(result) > 0:
        result['p_dist'] = result['geom_5070'].apply(lambda x: p.distance(x))
        row = result.sort_values(by=['p_dist']).iloc[0]
        node_id, osmid, distance_to_node = row['node_id'], row['osmid'], row['p_dist']
    return row

#get_nearest_node(apply_new_crs(Point(-85.3, 35.04), transformer_4326_5070), nodes)
get_nearest_node(apply_new_crs(Point(-85.269851, 35.0567932), transformer_4326_5070), nodes)

node_id                                             1190
osmid                                          202601417
lat                                            35.055591
lon                                           -85.268955
geom_4326                 POINT (-85.2689553 35.0555912)
geom_5070    POINT (968580.0168917554 1387764.497722734)
p_dist                                        157.010746
Name: 1190, dtype: object

In [6]:
def get_nearest_node(p, nodes, buf=400):
    node_id, osmid, distance_to_node = -1, -1, -1
    geom = p.buffer(buf)
    result = nodes[nodes['geom_5070'].within(geom)].copy()
    if len(result) > 0:
        result['p_dist'] = result['geom_5070'].apply(lambda x: p.distance(x))
        row = result.sort_values(by=['p_dist']).iloc[0]
        node_id, osmid, distance_to_node = row['node_id'], row['osmid'], row['p_dist']
    return node_id, osmid, distance_to_node

result = {'pickup_node_id': [], 
          'pickup_osmid': [], 
          'distance_to_pickup': [], 
          'dropoff_node_id': [], 
          'dropoff_osmid': [], 
          'distance_to_dropoff': []}

print(f"starting at {time.time()}")
temp = trips['geom_5070_pickup'].apply(lambda p: get_nearest_node(p, nodes))
for val in temp:
    result['pickup_node_id'].append(val[0])
    result['pickup_osmid'].append(val[1])
    result['distance_to_pickup'].append(val[2])
    
print(f"done with pickup at {time.time()}")
    
temp = trips['geom_5070_dropoff'].apply(lambda p: get_nearest_node(p, nodes))
for val in temp:
    result['dropoff_node_id'].append(val[0])
    result['dropoff_osmid'].append(val[1])
    result['distance_to_dropoff'].append(val[2])

trips['pickup_node_id'] = result['pickup_node_id']
trips['pickup_osmid'] = result['pickup_osmid']
trips['distance_to_pickup'] = result['distance_to_pickup']
trips['dropoff_node_id'] = result['dropoff_node_id']
trips['dropoff_osmid'] = result['dropoff_osmid']
trips['distance_to_dropoff'] = result['distance_to_dropoff']
print(f"done with dropoff at {time.time()}")
trips.head(2)

starting at 1626469125.1797998
done with pickup at 1626469842.084514
done with dropoff at 1626470563.0889466


Unnamed: 0,distance,time,date,pickup_lat,pickup_lon,dropoff_lat,dropoff_lon,passenger_types,am_wc,geom_4326_pickup,geom_4326_dropoff,geom_5070_pickup,geom_5070_dropoff,pickup_node_id,pickup_osmid,distance_to_pickup,dropoff_node_id,dropoff_osmid,distance_to_dropoff
0,9.76,4:15:00,2021-01-01,35.045644,-85.319982,35.022033,-85.241765,WH1,WC,POINT (-85.319982 35.045644),POINT (-85.241765 35.022033),POINT (964118.6371208868 1386139.680921921),POINT (971447.881122148 1384311.182383026),5839,202694168,1.755803,595,202579218,23.394279
1,7.99,4:15:00,2021-01-01,35.032584,-85.316599,35.022087,-85.241776,AM1,AM,POINT (-85.316599 35.032584),POINT (-85.241776 35.022087),POINT (964586.6717501065 1384721.948728439),POINT (971446.2095713749 1384317.073218042),236,202546601,102.705864,595,202579218,28.68902


In [7]:
def get_travel_time(pickup_node_id, dropoff_node_id, travel_time_matrix):
    if (pickup_node_id != -1) and (dropoff_node_id != -1):
        return travel_time_matrix[pickup_node_id, dropoff_node_id]
    else:
        return -1


trips['travel_time'] = trips.apply(lambda row: get_travel_time(row['pickup_node_id'], row['dropoff_node_id'], travel_time_matrix), axis=1)

trips = trips[(trips['pickup_node_id']>=0) & (trips['dropoff_node_id']>=0) & (trips['travel_time']>=0)]

def merge_date_time(date_str, time_str):
    return dt.datetime.strptime(f"{date_str} {time_str}", "%Y-%m-%d %H:%M:%S")

trips['pickup_datetime'] = trips.apply(lambda row: merge_date_time(row['date'], row['time']), axis=1)
trips['dropoff_datetime'] = trips.apply(lambda row: row['pickup_datetime'] + dt.timedelta(seconds=int(row['travel_time'])), axis=1)

#trips['e_datetime'] = trips['pickup_datetime'].apply(lambda x: x - dt.timedelta(minutes=TIME_BUF))
#trips['l_datetime'] = trips['dropoff_datetime'].apply(lambda x: x + dt.timedelta(minutes=TIME_BUF))

trips.head()

Unnamed: 0,distance,time,date,pickup_lat,pickup_lon,dropoff_lat,dropoff_lon,passenger_types,am_wc,geom_4326_pickup,geom_4326_dropoff,geom_5070_pickup,geom_5070_dropoff,pickup_node_id,pickup_osmid,distance_to_pickup,dropoff_node_id,dropoff_osmid,distance_to_dropoff,travel_time,pickup_datetime,dropoff_datetime
0,9.76,4:15:00,2021-01-01,35.045644,-85.319982,35.022033,-85.241765,WH1,WC,POINT (-85.319982 35.045644),POINT (-85.241765 35.022033),POINT (964118.6371208868 1386139.680921921),POINT (971447.881122148 1384311.182383026),5839,202694168,1.755803,595,202579218,23.394279,551.921936,2021-01-01 04:15:00,2021-01-01 04:24:11
1,7.99,4:15:00,2021-01-01,35.032584,-85.316599,35.022087,-85.241776,AM1,AM,POINT (-85.316599 35.032584),POINT (-85.241776 35.022087),POINT (964586.6717501065 1384721.948728439),POINT (971446.2095713749 1384317.073218042),236,202546601,102.705864,595,202579218,28.68902,524.678406,2021-01-01 04:15:00,2021-01-01 04:23:44
2,0.87,4:20:00,2021-01-01,35.024964,-85.247725,35.022033,-85.241765,AM1,AM,POINT (-85.247725 35.024964),POINT (-85.241765 35.022033),POINT (970874.9783932989 1384576.122342002),POINT (971447.881122148 1384311.182383026),5897,202696273,45.253888,595,202579218,23.394279,64.347206,2021-01-01 04:20:00,2021-01-01 04:21:04
3,2.89,4:30:00,2021-01-01,35.015012,-85.218661,35.022087,-85.241776,AM1,AM,POINT (-85.218661 35.015012),POINT (-85.241776 35.022087),POINT (973614.1090200449 1383767.095398095),POINT (971446.2095713749 1384317.073218042),5586,202685290,51.297526,595,202579218,28.68902,277.149414,2021-01-01 04:30:00,2021-01-01 04:34:37
4,3.32,4:40:00,2021-01-01,35.047207,-85.236001,35.022033,-85.241765,AM1,AM,POINT (-85.236001 35.047207),POINT (-85.241765 35.022033),POINT (971647.777992402 1387168.774339245),POINT (971447.881122148 1384311.182383026),3053,202632234,28.193583,595,202579218,23.394279,242.706818,2021-01-01 04:40:00,2021-01-01 04:44:02


In [8]:
trips = trips[['distance', 'pickup_lat', 'pickup_lon', 'dropoff_lat', 'dropoff_lon', 'am_wc', 'pickup_node_id', 'pickup_osmid', 'dropoff_node_id', 'dropoff_osmid', 'travel_time', 'pickup_datetime', 'dropoff_datetime']]
trips['distance'] = trips['distance'].astype(float)
trips['pickup_lat'] = trips['pickup_lat'].astype(float)
trips['pickup_lon'] = trips['pickup_lon'].astype(float)
trips['dropoff_lat'] = trips['dropoff_lat'].astype(float)
trips['dropoff_lon'] = trips['dropoff_lon'].astype(float)
trips['am_wc'] = trips['am_wc'].astype(str)
trips['pickup_node_id'] = trips['pickup_node_id'].astype(int)
trips['pickup_osmid'] = trips['pickup_osmid'].astype(int)
trips['dropoff_node_id'] = trips['dropoff_node_id'].astype(int)
trips['dropoff_osmid'] = trips['dropoff_osmid'].astype(int)
trips['travel_time'] = trips['travel_time'].astype(float)
trips['pickup_datetime'] = trips['pickup_datetime'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
trips['dropoff_datetime'] = trips['dropoff_datetime'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
#trips['e_datetime'] = trips['e_datetime'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
#trips['l_datetime'] = trips['l_datetime'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))

file_path = os.path.join(os.getcwd(), "data", "CARTA", "processed", "para_transit_trips_2021.csv")
trips.to_csv(file_path, index=False)
trips.head(2)

Unnamed: 0,distance,pickup_lat,pickup_lon,dropoff_lat,dropoff_lon,am_wc,pickup_node_id,pickup_osmid,dropoff_node_id,dropoff_osmid,travel_time,pickup_datetime,dropoff_datetime
0,9.76,35.045644,-85.319982,35.022033,-85.241765,WC,5839,202694168,595,202579218,551.921936,2021-01-01 04:15:00,2021-01-01 04:24:11
1,7.99,35.032584,-85.316599,35.022087,-85.241776,AM,236,202546601,595,202579218,524.678406,2021-01-01 04:15:00,2021-01-01 04:23:44
