In [None]:
import branca 
import folium
import geopandas as gpd
import numpy as np
import pandas as pd

import create_table
import utils
import neighbor
from update_vars import analysis_date, PARTRIDGE_FOLDER

In [None]:
one_trip = "30-046wc7nmq"
trip_filter = [[("trip_id", "==", one_trip)]]

gdf = create_table.stop_times_with_vp_table(
    analysis_date, 
    filters = trip_filter
)

In [None]:
stops_vp_map = gdf[["stop_sequence", "stop_geometry"]].explore(
    "stop_sequence",
    categorical=True, legend=False,
    tiles = "CartoDB Positron",
    name = "stops"
)

gdf[
    ["trip_id", "vp_geometry"]
].drop_duplicates().set_geometry("vp_geometry").explore(
    "trip_id",
    m=stops_vp_map, categorical=True,
    name="vp", 
)

gdf[
    ["shape_id", "shape_geometry"]
].drop_duplicates().set_geometry("shape_geometry").explore(
    "shape_id",
    m=stops_vp_map, categorical=True,
    name="shape", 
)
folium.LayerControl().add_to(stops_vp_map)
stops_vp_map

In [None]:
# https://datascientyst.com/combine-multiple-columns-into-single-one-in-pandas/
# Need a single column for trip across operators
trip_cols = ["schedule_gtfs_dataset_key", "trip_id"]

gdf = gdf.assign(
    trip_instance_key = gdf[trip_cols].astype(str).T.agg("__".join)
)

In [None]:
gdf.head()

In [None]:
vp_before, vp_after, vp_before_meters, vp_after_meters = np.vectorize(
    neighbor.two_nearest_neighbor_near_stop
)(
    gdf.vp_primary_direction, 
    gdf.vp_geometry, 
    gdf.vp_idx,
    gdf.stop_geometry,
    gdf.stop_opposite_direction,
    gdf.shape_geometry,
    gdf.stop_meters
)

In [None]:
gdf = gdf.assign(
    prior_vp_idx = vp_before,
    subseq_vp_idx = vp_after,
    prior_vp_meters = vp_before_meters, 
    subseq_vp_meters = vp_after_meters
)

In [None]:
cols = [
    "stop_seq_pair",
    "stop_meters", "prior_vp_meters", "subseq_vp_meters"
]
gdf[cols].head(2)

In [None]:
start_time_series = []
end_time_series = []

for row in gdf.itertuples():
    start_time, end_time = neighbor.grab_vp_timestamp(
        getattr(row, "prior_vp_idx"),
        getattr(row, "subseq_vp_idx"),
        getattr(row, "vp_idx"),
        getattr(row, "location_timestamp_local"),
    )
    
    start_time_series.append(start_time)
    end_time_series.append(end_time)

In [None]:
gdf = gdf.assign(
    start_local_timestamp = start_time_series,
    end_local_timestamp = end_time_series
)

In [None]:
interpolated_arrival_series = []

for row in gdf.itertuples():
    if (getattr(row, "prior_vp_idx") == -1) or (getattr(row, "subseq_vp_idx") == -1):
        arrival_time = np.nan
    else:
        arrival_time = neighbor.interpolate_stop_arrival_time(
            getattr(row, "stop_meters"),
            [getattr(row, "prior_vp_meters"), getattr(row, "subseq_vp_meters")],
            [getattr(row, "start_local_timestamp"), getattr(row, "end_local_timestamp")]
        )

    interpolated_arrival_series.append(arrival_time)

In [None]:
gdf["arrival_time"] = interpolated_arrival_series

In [None]:
gdf[gdf.arrival_time.notna()][["prior_vp_meters", "stop_meters", "subseq_vp_meters", 
          "start_local_timestamp", "arrival_time", "end_local_timestamp"]].head()

In [None]:
drop_cols = [
    "stop_opposite_direction",
    "vp_geometry", "vp_idx",
    'location_timestamp_local', 'vp_primary_direction', 
    'shape_geometry',
    'prior_vp_idx', 'subseq_vp_idx', 
    'prior_vp_meters', 'subseq_vp_meters', 
    'start_local_timestamp', 'end_local_timestamp'
]

In [None]:
gdf2 = neighbor.enforce_monotonicity_and_interpolate_across_stops(
    gdf, trip_stop_cols).drop(columns = drop_cols)

In [None]:
speeds = neighbor.calculate_speed_from_stop_arrivals(
    gdf2,
    trip_cols = ["trip_instance_key"],
    trip_stop_cols = ["trip_instance_key", "stop_sequence"]
)

In [None]:
speeds.shape

In [None]:
speeds[(speeds.speed_mph.notna()) & 
     (speeds.speed_mph < np.inf)].shape

In [None]:
segments = gpd.read_parquet(
    f"{PARTRIDGE_FOLDER}LADOT/segments.parquet",
    filters = trip_filter
)

In [None]:
speed_gdf = pd.merge(
    segments,
    speeds[speeds.sec_elapsed > 0],
    on = ["trip_id", "stop_id1", "stop_id2"]
)

In [None]:
speed_gdf.speed_mph.max()

In [None]:
COLORSCALE = branca.colormap.step.RdBu_10.scale(vmin=0, vmax=30)

speed_gdf.drop(columns = "arrival_time").explore(
    "speed_mph", cmap=COLORSCALE,
    tiles = "CartoDB Positron"
)