In [1]:
import branca 
import geopandas as gpd
import numpy as np
import pandas as pd

import create_table
import utils
from update_vars import analysis_date, PROJECT_CRS, PARTRIDGE_FOLDER

In [None]:
stops = gpd.read_parquet(f"{PARTRIDGE_FOLDER}stops_to_segments.parquet")

In [None]:
one_trip = "30-0ws74x7tc"
loopy_trip = '183-04vq6dp71'
#trip_filter = [[("trip_id", "==", one_trip)]]

ladot_trips = create_table.get_table(
    "trips", 
    analysis_date, 
    #filters = trip_filter
)

ladot_trips.head(2)

In [None]:
trips_in_common = list(
    set(stops.trip_id).intersection(set(ladot_trips.trip_id))
)

In [None]:
trips_in_common[0]

In [2]:
one_trip = "30-046wc7nmq"
trip_filter = [[("trip_id", "==", one_trip)]]

trips = create_table.get_table(
    "trips", 
    analysis_date, 
    filters = trip_filter
)

trips.head(2)

Unnamed: 0,gtfs_dataset_key,name,regional_feed_type,service_date,trip_start_date_pacific,trip_id,trip_instance_key,route_key,route_id,route_type,...,direction_id,shape_array_key,shape_id,trip_first_departure_datetime_pacific,trip_last_arrival_datetime_pacific,service_hours,trip_start_date_local_tz,trip_first_departure_datetime_local_tz,trip_last_arrival_datetime_local_tz,schedule_gtfs_dataset_key
0,cc53a0dbf5df90e3009b9cb5d89d80ba,LA DOT Schedule,,2024-10-16,2024-10-16,30-046wc7nmq,3ac076a16121669d9e2cb5eb2952e092,ac4285990f7b0d6b721a613274fcf375,576,3,...,1.0,a174661805ed0e51d4db2fc5de1a9b3c,14576,2024-10-16 19:00:00,2024-10-16 19:40:00,0.666667,2024-10-16,2024-10-16 19:00:00,2024-10-16 19:40:00,cc53a0dbf5df90e3009b9cb5d89d80ba


In [3]:
shapes = create_table.get_table(
    "shapes", 
    analysis_date, 
    filters = [[("shape_id", "in", trips.shape_id)]],
    columns = ["shape_id", "geometry"]
).to_crs(PROJECT_CRS)

shapes.head(2)

Unnamed: 0,shape_id,geometry
0,14576,"LINESTRING (153598.371 -443508.560, 153637.445..."


In [4]:
trip_cols = ["schedule_gtfs_dataset_key", "trip_id"]

stops_projected = create_table.stop_times_projected_table(
    analysis_date, 
    filters = trip_filter,
)


# We don't do this in our pipeline, because vp meters is an array
# But this illustrates the point more clearly
stops_projected = stops_projected.assign(
    subseq_stop_meters = stops_projected.groupby(trip_cols).stop_meters.shift(-1)
).rename(columns = {"stop_id": "stop_id1"})

In [5]:
import neighbor

In [6]:
stops_condensed = utils.condense_by_trip(
    stops_projected, 
    group_cols = ["schedule_gtfs_dataset_key", "trip_id"],
    sort_cols = ["schedule_gtfs_dataset_key", "trip_id", "stop_sequence"],
    geometry_col = "geometry",
    array_cols = ["stop_sequence"]
)

current_stop_geom = np.array(stops_condensed.geometry.iloc[0].coords)
next_stop_geom = current_stop_geom[1:]
# distance_east, distance_north
direction_arr = np.asarray(
    # first value is unknown because there is no prior stop to compare to
    ["Unknown"] + 
    [utils.cardinal_definition_rules(pt[0] - prior_pt[0], pt[1] - prior_pt[1]) 
     for pt, prior_pt
     in zip(next_stop_geom, current_stop_geom)]
)

stops_projected["stop_primary_direction"] = direction_arr
stops_projected["stop_opposite_direction"] = stops_projected.stop_primary_direction.map(
    neighbor.OPPOSITE_DIRECTIONS)

In [7]:
vp_projected = create_table.vp_projected_table(
    analysis_date,
    filters = trip_filter
)   

In [8]:
vp_nn = utils.condense_by_trip(
    vp_projected,
    group_cols = ["schedule_gtfs_dataset_key", "trip_id"],
    sort_cols = ["schedule_gtfs_dataset_key", "trip_id", "vp_idx"],
    geometry_col = "geometry",
    array_cols = ["vp_idx", "location_timestamp_local"]
)


current_stop_geom = np.array(vp_nn.geometry.iloc[0].coords)
next_stop_geom = current_stop_geom[1:]
# distance_east, distance_north
direction_arr = np.asarray(
    # first value is unknown because there is no prior stop to compare to
    ["Unknown"] + 
    [utils.cardinal_definition_rules(pt[0] - prior_pt[0], pt[1] - prior_pt[1]) 
     for pt, prior_pt
     in zip(next_stop_geom, current_stop_geom)]
)
vp_nn["vp_primary_direction"] = [direction_arr]

#vp_projected["vp_primary_direction"] = direction_arr

In [9]:
gdf = pd.merge(
    stops_projected.rename(columns = {"geometry": "stop_geometry"}),
    vp_nn.rename(columns = {"geometry": "vp_geometry"}),
    on = ["schedule_gtfs_dataset_key", "trip_id"],
    how = "inner"
).merge(
    shapes.rename(columns = {"geometry": "shape_geometry"}),
    on = "shape_id",
    how = "inner"
)

In [10]:
gdf.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,trip_id,stop_id1,stop_sequence,stop_geometry,shape_id,stop_meters,stop_id2,stop_seq_pair,subseq_stop_meters,stop_primary_direction,stop_opposite_direction,vp_geometry,vp_idx,location_timestamp_local,vp_primary_direction,shape_geometry
0,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,297696,0,POINT (153598.371 -443508.560),14576,1.598321e-09,305821,0__1,663.423266,Unknown,,"LINESTRING (153200.032 -441113.419, 153200.944...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[2024-10-16 19:08:21, 2024-10-16 19:08:41, 202...","[Unknown, Eastbound, Westbound, Eastbound, Sou...","LINESTRING (153598.371 -443508.560, 153637.445..."
1,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305821,1,POINT (153786.685 -443168.548),14576,663.4233,305822,1__2,1054.75581,Northbound,Southbound,"LINESTRING (153200.032 -441113.419, 153200.944...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[2024-10-16 19:08:21, 2024-10-16 19:08:41, 202...","[Unknown, Eastbound, Westbound, Eastbound, Sou...","LINESTRING (153598.371 -443508.560, 153637.445..."


In [11]:
vp_nn[["trip_id", "geometry"]].explore("trip_id", tiles = "CartoDB Positron")

In [12]:
vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
    neighbor.two_nearest_neighbor_near_stop
)(
    gdf.vp_primary_direction, 
    gdf.vp_geometry, 
    gdf.vp_idx,
    gdf.stop_geometry,
    gdf.stop_opposite_direction,
    gdf.shape_geometry,
    gdf.stop_meters
)

In [24]:
import importlib
importlib.reload(neighbor)

<module 'neighbor' from '/home/jovyan/gtfs-rt-demo/scripts/neighbor.py'>

In [13]:
gdf = gdf.assign(
    prior_vp_idx = vp_before,
    subseq_vp_idx = vp_after,
    prior_vp_meters = vp_before_meters, 
    subseq_vp_meters = vp_after_meters
)

In [None]:
test_gdf = gdf[
    (gdf.prior_vp_idx != -1) & 
    (gdf.subseq_vp_idx != -1)
].head(5)

In [None]:
test_gdf.dtypes

In [25]:
start_time_series = []
end_time_series = []

for row in gdf.itertuples():
    start_time, end_time = neighbor.grab_vp_timestamp(
        getattr(row, "prior_vp_idx"),
        getattr(row, "subseq_vp_idx"),
        getattr(row, "vp_idx"),
        getattr(row, "location_timestamp_local"),
    )
    
    start_time_series.append(start_time)
    end_time_series.append(end_time)

In [35]:
gdf = gdf.assign(
    start_local_timestamp = start_time_series,
    end_local_timestamp = end_time_series
)

gdf = gdf.assign(
    sec_elapsed = (gdf.end_local_timestamp - gdf.start_local_timestamp).divide(
                   np.timedelta64(1, 's')).abs(),
    meters_elapsed = gdf.subseq_vp_meters - gdf.prior_vp_meters  
)

speed_series = utils.calculate_speed(gdf.meters_elapsed, gdf.sec_elapsed)

gdf = gdf.assign(
    speed = speed_series
)

In [37]:
gdf

Unnamed: 0,schedule_gtfs_dataset_key,trip_id,stop_id1,stop_sequence,stop_geometry,shape_id,stop_meters,stop_id2,stop_seq_pair,subseq_stop_meters,...,shape_geometry,prior_vp_idx,subseq_vp_idx,prior_vp_meters,subseq_vp_meters,start_local_timestamp,end_local_timestamp,sec_elapsed,meters_elapsed,speed
0,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,297696,0,POINT (153598.371 -443508.560),14576,1.598321e-09,305821.0,0__1,663.423266,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,12,0,3910.624177,NaT,2024-10-16 19:12:21,,3910.624177,
1,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305821,1,POINT (153786.685 -443168.548),14576,663.4233,305822.0,1__2,1054.75581,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,13,0,4002.044696,NaT,2024-10-16 19:12:41,,4002.044696,
2,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305822,2,POINT (153777.139 -442777.536),14576,1054.756,305823.0,2__3,1478.099137,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,13,0,4002.044696,NaT,2024-10-16 19:12:41,,4002.044696,
3,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305823,3,POINT (153769.457 -442355.980),14576,1478.099,305824.0,3__4,1877.550086,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,13,0,4002.044696,NaT,2024-10-16 19:12:41,,4002.044696,
4,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305824,4,POINT (153737.573 -441974.587),14576,1877.55,305825.0,4__5,2056.055828,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,13,0,4002.044696,NaT,2024-10-16 19:12:41,,4002.044696,
5,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305825,5,POINT (153559.155 -441978.585),14576,2056.056,305826.0,5__6,2346.881221,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,12,0,3910.624177,NaT,2024-10-16 19:12:21,,3910.624177,
6,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305826,6,POINT (153268.379 -441983.902),14576,2346.881,305827.0,6__7,2765.718086,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,5,0,3157.481,NaT,2024-10-16 19:10:01,,3157.481,
7,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305827,7,POINT (153250.392 -441579.054),14576,2765.718,305829.0,7__8,3474.866449,...,"LINESTRING (153598.371 -443508.560, 153637.445...",-1,6,0,3538.18959,NaT,2024-10-16 19:10:17,,3538.18959,
8,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305828,9,POINT (153215.467 -441209.004),14576,3474.866,305830.0,9__10,3289.779537,...,"LINESTRING (153598.371 -443508.560, 153637.445...",3,6,3252,3538.18959,2024-10-16 19:09:17,2024-10-16 19:10:17,60.0,286.18959,10.670102
9,cc53a0dbf5df90e3009b9cb5d89d80ba,30-046wc7nmq,305829,8,POINT (153188.368 -441127.713),14576,3289.78,305828.0,8__9,3629.0701,...,"LINESTRING (153598.371 -443508.560, 153637.445...",4,-1,3158,0.0,2024-10-16 19:09:41,NaT,,-3158.0,
