## LADOT one trip - projecting distance for all vp

In [None]:
import geopandas as gpd
import pandas as pd

import create_table
import utils
from update_vars import analysis_date, PROJECT_CRS

In [None]:
one_trip = "30-n30kvaejc"

trip_filter = [[("trip_id", "==", one_trip)]]

trips = create_table.get_table(
    "trips", 
    analysis_date, 
    filters = trip_filter
)

trips

## Look at what's in all the tables

In [None]:
stop_times_direction = create_table.get_table(
    "stop_times_direction",
    analysis_date,
    filters = trip_filter,
    columns = ["trip_id", "stop_sequence", "geometry"]
).to_crs(PROJECT_CRS)


shapes = create_table.get_table(
    "shapes",
    analysis_date,
    filters = [[("shape_id", "in", trips.shape_id)]],
    columns = ["shape_id", "geometry"]
).to_crs(PROJECT_CRS)

In [None]:
vp = create_table.get_table(
    "vp", 
    analysis_date,
    filters = trip_filter,
    columns = [
        "trip_id", 
        "location_timestamp_local", "geometry"
    ]
).to_crs(PROJECT_CRS).sort_values(
    "location_timestamp_local"
).reset_index(drop=True)

vp.shape

In [None]:
vp.head()

In [None]:
m = utils.plot_vp_shape_stops(
    vp,
    shapes,
    stop_times_direction,
    vp_as_line=True
)

m

In [None]:
m2 = utils.plot_vp_shape_stops(
    vp,
    shapes,
    stop_times_direction,
    vp_as_line=False
)

m2

## Put stop_times, trips, stops, shapes tables together

In [None]:
stops_projected = create_table.stop_times_projected_table(
    analysis_date, 
    filters = trip_filter,
)

In [None]:
stops_projected.head()

## Put vp with shape

In [None]:
vp_projected = create_table.vp_projected_table(
    analysis_date,
    filters = trip_filter
)   

In [None]:
vp_projected.head()

## Demo: find speed between 2 stops

Find the speed between stop_sequence 5 and 6.

In [None]:
def stops_and_vp_between_two_stops(
    start_stop: int,
    end_stop: int
) -> tuple[pd.DataFrame]:
    """
    """
    stops_projected_subset = stops_projected.loc[
        (stops_projected.stop_sequence >= start_stop) &
        (stops_projected.stop_sequence <= end_stop)
    ]
    
    vp_projected_subset = vp_projected.loc[
        (vp_projected.vp_meters >= stops_projected_subset.stop_meters.min()) & 
        (vp_projected.vp_meters <= stops_projected_subset.stop_meters.max())
    ]

    return stops_projected_subset, vp_projected_subset

In [None]:
stops_subset, vp_subset = stops_and_vp_between_two_stops(5, 6)

In [None]:
vp_subset.index

In [None]:
stops_subset

In [None]:
vp_subset

In [None]:
stops_subset2, vp_subset2 = stops_and_vp_between_two_stops(49, 50)
stops_subset

In [None]:
vp_subset2

In [None]:
m3 = utils.plot_vp_shape_stops(
    vp.iloc[vp_subset.index],
    shapes,
    stop_times_direction[
        (stop_times_direction.stop_sequence >= 5) & 
        (stop_times_direction.stop_sequence <= 6)],
    vp_as_line=False
)

m3

In [None]:
m4 = utils.plot_vp_shape_stops(
    vp.iloc[vp_subset2.index],
    shapes,
    stop_times_direction[
        (stop_times_direction.stop_sequence >= 49) & 
        (stop_times_direction.stop_sequence <= 50)],
    vp_as_line=False
)

m4

## Methodology
* Project each stop position and vehicle position onto shape
* Use that to find distance and time elapsed
* Speed can be calculated between stops

### Real World Complexities
* This one trip, for the most part, it's not that many vp between each stop. Are we saving much time if we filter it out anyway?
   * It depends on the framework of what you use to define a segment.
   * A stop-to-stop segment is fairly finite. But what if we move to corridors? Each city block?
   * If we start combining trips across multiple operators that travel along the same street, how do we go about filtering efficiently without calculating every delta there is, and using only a fraction of those to calculate what we're interested in?
* Ideally, the meters progressed increases monotonically, though that's not true for about 1/3 of the routes where there is loop or inlining occurring. If a bus double backs along any portion of the shape, (going one way along a major street, then back along it; exiting a plaza), then `vp_meters` can actually decrease for a bit without being incorrect.
   * We need an additional data processing step...why `stop_times_direction` was created, we want to know what a stop's primary direction of travel is.
   * We should add something similar to vp.
   * If a vp isn't moving, the `vp_primary_direction="Unknown"`, and actually that helps us get at dwell times too.
   * This is not a dwell time at a stop necessarily, but how many vp observations did we capture without the bus moving (aka traffic). 
   * For a single day, for all operators with RT, this narrows down the rows from 15M to 12M (so that's a nice chunk that we can roll-up!)
   * Nearest neighbors will help us